diff options
Diffstat (limited to 'src/fftw')
75 files changed, 34856 insertions, 0 deletions
diff --git a/src/fftw/config.c b/src/fftw/config.c new file mode 100644 index 0000000..84fb0ac --- /dev/null +++ b/src/fftw/config.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* config.c -- this file contains all the codelets the system knows about */ + +/* $Id: config.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */ + +#include "fftw-int.h" + +/* the signature is the same as the size, for now */ +#define NOTW_CODELET(x) \ + &fftw_no_twiddle_##x##_desc +#define NOTWI_CODELET(x) \ + &fftwi_no_twiddle_##x##_desc + +#define TWIDDLE_CODELET(x) \ + &fftw_twiddle_##x##_desc + +#define TWIDDLEI_CODELET(x) \ + &fftwi_twiddle_##x##_desc + +/* automatically-generated list of codelets */ + +extern fftw_codelet_desc fftw_no_twiddle_1_desc; +extern fftw_codelet_desc fftwi_no_twiddle_1_desc; +extern fftw_codelet_desc fftw_no_twiddle_2_desc; +extern fftw_codelet_desc fftwi_no_twiddle_2_desc; +extern fftw_codelet_desc fftw_no_twiddle_3_desc; +extern fftw_codelet_desc fftwi_no_twiddle_3_desc; +extern fftw_codelet_desc fftw_no_twiddle_4_desc; +extern fftw_codelet_desc fftwi_no_twiddle_4_desc; +extern fftw_codelet_desc fftw_no_twiddle_5_desc; +extern fftw_codelet_desc fftwi_no_twiddle_5_desc; +extern fftw_codelet_desc fftw_no_twiddle_6_desc; +extern fftw_codelet_desc fftwi_no_twiddle_6_desc; +extern fftw_codelet_desc fftw_no_twiddle_7_desc; +extern fftw_codelet_desc fftwi_no_twiddle_7_desc; +extern fftw_codelet_desc fftw_no_twiddle_8_desc; +extern fftw_codelet_desc fftwi_no_twiddle_8_desc; +extern fftw_codelet_desc fftw_no_twiddle_9_desc; +extern fftw_codelet_desc fftwi_no_twiddle_9_desc; +extern fftw_codelet_desc fftw_no_twiddle_10_desc; +extern fftw_codelet_desc fftwi_no_twiddle_10_desc; +extern fftw_codelet_desc fftw_no_twiddle_11_desc; +extern fftw_codelet_desc fftwi_no_twiddle_11_desc; +extern fftw_codelet_desc fftw_no_twiddle_12_desc; +extern fftw_codelet_desc fftwi_no_twiddle_12_desc; +extern fftw_codelet_desc fftw_no_twiddle_13_desc; +extern fftw_codelet_desc fftwi_no_twiddle_13_desc; +extern fftw_codelet_desc fftw_no_twiddle_14_desc; +extern fftw_codelet_desc fftwi_no_twiddle_14_desc; +extern fftw_codelet_desc fftw_no_twiddle_15_desc; +extern fftw_codelet_desc fftwi_no_twiddle_15_desc; +extern fftw_codelet_desc fftw_no_twiddle_16_desc; +extern fftw_codelet_desc fftwi_no_twiddle_16_desc; +extern fftw_codelet_desc fftw_no_twiddle_32_desc; +extern fftw_codelet_desc fftwi_no_twiddle_32_desc; +extern fftw_codelet_desc fftw_no_twiddle_64_desc; +extern fftw_codelet_desc fftwi_no_twiddle_64_desc; +extern fftw_codelet_desc fftw_twiddle_2_desc; +extern fftw_codelet_desc fftwi_twiddle_2_desc; +extern fftw_codelet_desc fftw_twiddle_3_desc; +extern fftw_codelet_desc fftwi_twiddle_3_desc; +extern fftw_codelet_desc fftw_twiddle_4_desc; +extern fftw_codelet_desc fftwi_twiddle_4_desc; +extern fftw_codelet_desc fftw_twiddle_5_desc; +extern fftw_codelet_desc fftwi_twiddle_5_desc; +extern fftw_codelet_desc fftw_twiddle_6_desc; +extern fftw_codelet_desc fftwi_twiddle_6_desc; +extern fftw_codelet_desc fftw_twiddle_7_desc; +extern fftw_codelet_desc fftwi_twiddle_7_desc; +extern fftw_codelet_desc fftw_twiddle_8_desc; +extern fftw_codelet_desc fftwi_twiddle_8_desc; +extern fftw_codelet_desc fftw_twiddle_9_desc; +extern fftw_codelet_desc fftwi_twiddle_9_desc; +extern fftw_codelet_desc fftw_twiddle_10_desc; +extern fftw_codelet_desc fftwi_twiddle_10_desc; +extern fftw_codelet_desc fftw_twiddle_16_desc; +extern fftw_codelet_desc fftwi_twiddle_16_desc; +extern fftw_codelet_desc fftw_twiddle_32_desc; +extern fftw_codelet_desc fftwi_twiddle_32_desc; +extern fftw_codelet_desc fftw_twiddle_64_desc; +extern fftw_codelet_desc fftwi_twiddle_64_desc; + +fftw_codelet_desc *fftw_config[] = { + NOTW_CODELET(1), + NOTWI_CODELET(1), + NOTW_CODELET(2), + NOTWI_CODELET(2), + NOTW_CODELET(3), + NOTWI_CODELET(3), + NOTW_CODELET(4), + NOTWI_CODELET(4), + NOTW_CODELET(5), + NOTWI_CODELET(5), + NOTW_CODELET(6), + NOTWI_CODELET(6), + NOTW_CODELET(7), + NOTWI_CODELET(7), + NOTW_CODELET(8), + NOTWI_CODELET(8), + NOTW_CODELET(9), + NOTWI_CODELET(9), + NOTW_CODELET(10), + NOTWI_CODELET(10), + NOTW_CODELET(11), + NOTWI_CODELET(11), + NOTW_CODELET(12), + NOTWI_CODELET(12), + NOTW_CODELET(13), + NOTWI_CODELET(13), + NOTW_CODELET(14), + NOTWI_CODELET(14), + NOTW_CODELET(15), + NOTWI_CODELET(15), + NOTW_CODELET(16), + NOTWI_CODELET(16), + NOTW_CODELET(32), + NOTWI_CODELET(32), + NOTW_CODELET(64), + NOTWI_CODELET(64), + TWIDDLE_CODELET(2), + TWIDDLEI_CODELET(2), + TWIDDLE_CODELET(3), + TWIDDLEI_CODELET(3), + TWIDDLE_CODELET(4), + TWIDDLEI_CODELET(4), + TWIDDLE_CODELET(5), + TWIDDLEI_CODELET(5), + TWIDDLE_CODELET(6), + TWIDDLEI_CODELET(6), + TWIDDLE_CODELET(7), + TWIDDLEI_CODELET(7), + TWIDDLE_CODELET(8), + TWIDDLEI_CODELET(8), + TWIDDLE_CODELET(9), + TWIDDLEI_CODELET(9), + TWIDDLE_CODELET(10), + TWIDDLEI_CODELET(10), + TWIDDLE_CODELET(16), + TWIDDLEI_CODELET(16), + TWIDDLE_CODELET(32), + TWIDDLEI_CODELET(32), + TWIDDLE_CODELET(64), + TWIDDLEI_CODELET(64), + (fftw_codelet_desc *) 0 +}; diff --git a/src/fftw/config.h b/src/fftw/config.h new file mode 100644 index 0000000..ef5cd64 --- /dev/null +++ b/src/fftw/config.h @@ -0,0 +1,171 @@ +/* -*- C -*- */ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* fftw.h -- system-wide definitions */ +/* $Id: config.h,v 1.1 2008/10/17 06:13:18 scuri Exp $ */ + +/* configuration options (guessed by configure) */ + +/* Define to empty if the keyword does not work. */ +/* #undef const */ + +/* Define if you have the gettimeofday function. */ +/* #undef HAVE_GETTIMEOFDAY */ + +/* Define if you have the BSDgettimeofday function. */ +/* #undef HAVE_BSDGETTIMEOFDAY */ + +/* Define if you have the <sys/time.h> header file. */ +/* #undef HAVE_SYS_TIME_H */ + +/* Define if you have the <unistd.h> header file. */ +/* #undef HAVE_UNISTD_H */ + +/* Define if you have the <getopt.h> header file. */ +/* #undef HAVE_GETOPT_H */ + +/* Define if you have the <malloc.h> header file */ +/* #undef HAVE_MALLOC_H */ + +/* Define if you have gethrtime() a la Solaris 2 */ +/* #undef HAVE_GETHRTIME */ +/* #undef HAVE_HRTIME_T */ + +/* Define to sizeof int and long long, if available: */ +#define SIZEOF_INT 0 +#define SIZEOF_LONG_LONG 0 + +#if (SIZEOF_INT != 0) && (SIZEOF_LONG_LONG >= 2 * SIZEOF_INT) +# define LONGLONG_IS_TWOINTS +#endif + +/* Define to use "unsafe" modular multiply (can cause integer overflow + and errors for transforms of large prime sizes using Rader). */ +/* #undef FFTW_ENABLE_UNSAFE_MULMOD */ + +/* Define if you have getopt() */ +/* #undef HAVE_GETOPT */ + +/* Define if you have getopt_long() */ +/* #undef HAVE_GETOPT_LONG */ + +/* Define if you have isnan() */ +/* #undef HAVE_ISNAN */ + +/* Define for enabling the high resolution Pentium timer */ +/* #undef FFTW_ENABLE_PENTIUM_TIMER */ + +/* + * When using FFTW_ENABLE_PENTIUM_TIMER, set FFTW_CYCLES_PER_SEC + * to your real CPU clock speed! + */ +/* This is for 200 MHz */ +/* #define FFTW_CYCLES_PER_SEC 200000000L */ + +/* + * Define to enable a gcc/x86 specific hack that aligns + * the stack to an 8-byte boundary + */ +/* #undef FFTW_ENABLE_I386_HACKS */ + +/* Define when using a version of gcc that aligns the stack properly */ +/* #undef FFTW_GCC_ALIGNS_STACK */ + +/* Define to enable extra runtime checks for debugging. */ +/* #undef FFTW_DEBUG */ + +/* Define to enable vector-recurse feature. */ +/* #undef FFTW_ENABLE_VECTOR_RECURSE */ + +/* + * Define to enable extra runtime checks for the alignment of variables + * in the codelets (causes coredump for misaligned double on x86). + */ +/* #undef FFTW_DEBUG_ALIGNMENT */ + +#define FFTW_VERSION "2.1.5" + +/* Use Win32 high-resolution timer */ +#if defined(__WIN32__) || defined(WIN32) || defined(_WINDOWS) +# define HAVE_WIN32_TIMER +# define HAVE_WIN32 +#endif + +/* Use MacOS Time Manager timer */ +#if defined(MAC) || defined(macintosh) +# define HAVE_MAC_TIMER +# define HAVE_MACOS + +/* Define to use nanosecond timer on PCI PowerMacs: */ +/* (WARNING: experimental, use at your own risk.) */ +/* #undef HAVE_MAC_PCI_TIMER */ +#endif + +/* define if you have alloca.h: */ +/* #undef HAVE_ALLOCA_H */ + +/* define if you have the alloca function: */ +/* #undef HAVE_ALLOCA */ + +/************************** threads configuration ************************/ + +/* The following preprocessor symbols select which threads library + to use when compiling the FFTW threads parallel libraries: */ + +/* #undef FFTW_USING_SOLARIS_THREADS */ +/* #undef FFTW_USING_POSIX_THREADS */ +/* #undef FFTW_USING_BEOS_THREADS */ +/* #undef FFTW_USING_MACH_THREADS */ +/* #undef FFTW_USING_OPENMP_THREADS */ +/* #undef FFTW_USING_SGIMP_THREADS */ + +/* on AIX, this gets defined to PTHREAD_CREATE_UNDETACHED, as that + system uses a non-standard name for this attribute (sigh). */ +/* #undef PTHREAD_CREATE_JOINABLE */ + +/* #undef HAVE_MACH_CTHREADS_H */ +/* #undef HAVE_CTHREADS_H */ +/* #undef HAVE_CTHREAD_H */ + +#ifdef HAVE_WIN32 +#define FFTW_USING_WIN32_THREADS +#endif + +#ifdef HAVE_MACOS +#define FFTW_USING_MACOS_THREADS +#endif + +/*********************** fortran wrapper configuration *********************/ + +/* F77_FUNC_ is defined to a macro F77_FUNC_(name,NAME) by autoconf, that + takes an identifier name (lower case) and NAME (upper case) and returns + the appropriately mangled identifier for the Fortran linker. On + non-Unix systems you will have to define this manually. For example, + if your linker converts identifiers to lower-case followed by an + underscore, you would do: #define F77_FUNC_(name,NAME) name ## _ +*/ +/* #undef F77_FUNC_ */ + +/* The following symbols control how MPI_Comm data structures are + translated between Fortran and C for the fftw_mpi wrappers. See + the file mpi/fftw_f77_mpi.h for more information. */ +/* #undef HAVE_MPI_COMM_F2C */ +/* #undef FFTW_USE_F77_MPI_COMM */ +/* #undef FFTW_USE_F77_MPI_COMM_P */ diff --git a/src/fftw/executor.c b/src/fftw/executor.c new file mode 100644 index 0000000..fb200ab --- /dev/null +++ b/src/fftw/executor.c @@ -0,0 +1,465 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * executor.c -- execute the fft + */ + +/* $Id: executor.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */ +#include "fftw-int.h" +#include <stdio.h> +#include <stdlib.h> + +const char *fftw_version = "FFTW V" FFTW_VERSION " ($Id: executor.c,v 1.1 2008/10/17 06:13:18 scuri Exp $)"; + +/* + * This function is called in other files, so we cannot declare + * it static. + */ +void fftw_strided_copy(int n, fftw_complex *in, int ostride, + fftw_complex *out) +{ + int i; + fftw_real r0, r1, i0, i1; + fftw_real r2, r3, i2, i3; + + i = 0; + + for (; i < (n & 3); ++i) { + out[i * ostride] = in[i]; + } + + for (; i < n; i += 4) { + r0 = c_re(in[i]); + i0 = c_im(in[i]); + r1 = c_re(in[i + 1]); + i1 = c_im(in[i + 1]); + r2 = c_re(in[i + 2]); + i2 = c_im(in[i + 2]); + r3 = c_re(in[i + 3]); + i3 = c_im(in[i + 3]); + c_re(out[i * ostride]) = r0; + c_im(out[i * ostride]) = i0; + c_re(out[(i + 1) * ostride]) = r1; + c_im(out[(i + 1) * ostride]) = i1; + c_re(out[(i + 2) * ostride]) = r2; + c_im(out[(i + 2) * ostride]) = i2; + c_re(out[(i + 3) * ostride]) = r3; + c_im(out[(i + 3) * ostride]) = i3; + } +} + +static void executor_many(int n, const fftw_complex *in, + fftw_complex *out, + fftw_plan_node *p, + int istride, + int ostride, + int howmany, int idist, int odist, + fftw_recurse_kind recurse_kind) +{ + int s; + + switch (p->type) { + case FFTW_NOTW: + { + fftw_notw_codelet *codelet = p->nodeu.notw.codelet; + + HACK_ALIGN_STACK_ODD; + for (s = 0; s < howmany; ++s) + codelet(in + s * idist, + out + s * odist, + istride, ostride); + break; + } + + default: + for (s = 0; s < howmany; ++s) + fftw_executor_simple(n, in + s * idist, + out + s * odist, + p, istride, ostride, + recurse_kind); + } +} + +#ifdef FFTW_ENABLE_VECTOR_RECURSE + +/* executor_many_vector is like executor_many, but it pushes the + howmany loop down to the leaves of the transform: */ +static void executor_many_vector(int n, const fftw_complex *in, + fftw_complex *out, + fftw_plan_node *p, + int istride, + int ostride, + int howmany, int idist, int odist) +{ + int s; + + switch (p->type) { + case FFTW_NOTW: + { + fftw_notw_codelet *codelet = p->nodeu.notw.codelet; + + HACK_ALIGN_STACK_ODD; + for (s = 0; s < howmany; ++s) + codelet(in + s * idist, + out + s * odist, + istride, ostride); + break; + } + + case FFTW_TWIDDLE: + { + int r = p->nodeu.twiddle.size; + int m = n / r; + fftw_twiddle_codelet *codelet; + fftw_complex *W; + + for (s = 0; s < r; ++s) + executor_many_vector(m, in + s * istride, + out + s * (m * ostride), + p->nodeu.twiddle.recurse, + istride * r, ostride, + howmany, idist, odist); + + codelet = p->nodeu.twiddle.codelet; + W = p->nodeu.twiddle.tw->twarray; + + /* This may not be the right thing. We maybe should have + the howmany loop for the twiddle codelets at the + topmost level of the recursion, since odist is big; + i.e. separate recursions for twiddle and notwiddle. */ + HACK_ALIGN_STACK_EVEN; + for (s = 0; s < howmany; ++s) + codelet(out + s * odist, W, m * ostride, m, ostride); + + break; + } + + case FFTW_GENERIC: + { + int r = p->nodeu.generic.size; + int m = n / r; + fftw_generic_codelet *codelet; + fftw_complex *W; + + for (s = 0; s < r; ++s) + executor_many_vector(m, in + s * istride, + out + s * (m * ostride), + p->nodeu.generic.recurse, + istride * r, ostride, + howmany, idist, odist); + + codelet = p->nodeu.generic.codelet; + W = p->nodeu.generic.tw->twarray; + for (s = 0; s < howmany; ++s) + codelet(out + s * odist, W, m, r, n, ostride); + + break; + } + + case FFTW_RADER: + { + int r = p->nodeu.rader.size; + int m = n / r; + fftw_rader_codelet *codelet; + fftw_complex *W; + + for (s = 0; s < r; ++s) + executor_many_vector(m, in + s * istride, + out + s * (m * ostride), + p->nodeu.rader.recurse, + istride * r, ostride, + howmany, idist, odist); + + codelet = p->nodeu.rader.codelet; + W = p->nodeu.rader.tw->twarray; + for (s = 0; s < howmany; ++s) + codelet(out + s * odist, W, m, r, ostride, + p->nodeu.rader.rader_data); + + break; + } + + default: + fftw_die("BUG in executor: invalid plan\n"); + break; + } +} + +#endif /* FFTW_ENABLE_VECTOR_RECURSE */ + +/* + * Do *not* declare simple executor static--we need to call it + * from other files...also, preface its name with "fftw_" + * to avoid any possible name collisions. + */ +void fftw_executor_simple(int n, const fftw_complex *in, + fftw_complex *out, + fftw_plan_node *p, + int istride, + int ostride, + fftw_recurse_kind recurse_kind) +{ + switch (p->type) { + case FFTW_NOTW: + HACK_ALIGN_STACK_ODD; + (p->nodeu.notw.codelet)(in, out, istride, ostride); + break; + + case FFTW_TWIDDLE: + { + int r = p->nodeu.twiddle.size; + int m = n / r; + fftw_twiddle_codelet *codelet; + fftw_complex *W; + +#ifdef FFTW_ENABLE_VECTOR_RECURSE + if (recurse_kind == FFTW_NORMAL_RECURSE) +#endif + executor_many(m, in, out, + p->nodeu.twiddle.recurse, + istride * r, ostride, + r, istride, m * ostride, + FFTW_NORMAL_RECURSE); +#ifdef FFTW_ENABLE_VECTOR_RECURSE + else + executor_many_vector(m, in, out, + p->nodeu.twiddle.recurse, + istride * r, ostride, + r, istride, m * ostride); +#endif + + codelet = p->nodeu.twiddle.codelet; + W = p->nodeu.twiddle.tw->twarray; + + HACK_ALIGN_STACK_EVEN; + codelet(out, W, m * ostride, m, ostride); + + break; + } + + case FFTW_GENERIC: + { + int r = p->nodeu.generic.size; + int m = n / r; + fftw_generic_codelet *codelet; + fftw_complex *W; + +#ifdef FFTW_ENABLE_VECTOR_RECURSE + if (recurse_kind == FFTW_NORMAL_RECURSE) +#endif + executor_many(m, in, out, + p->nodeu.generic.recurse, + istride * r, ostride, + r, istride, m * ostride, + FFTW_NORMAL_RECURSE); +#ifdef FFTW_ENABLE_VECTOR_RECURSE + else + executor_many_vector(m, in, out, + p->nodeu.generic.recurse, + istride * r, ostride, + r, istride, m * ostride); +#endif + + codelet = p->nodeu.generic.codelet; + W = p->nodeu.generic.tw->twarray; + codelet(out, W, m, r, n, ostride); + + break; + } + + case FFTW_RADER: + { + int r = p->nodeu.rader.size; + int m = n / r; + fftw_rader_codelet *codelet; + fftw_complex *W; + +#ifdef FFTW_ENABLE_VECTOR_RECURSE + if (recurse_kind == FFTW_NORMAL_RECURSE) +#endif + executor_many(m, in, out, + p->nodeu.rader.recurse, + istride * r, ostride, + r, istride, m * ostride, + FFTW_NORMAL_RECURSE); +#ifdef FFTW_ENABLE_VECTOR_RECURSE + else + executor_many_vector(m, in, out, + p->nodeu.rader.recurse, + istride * r, ostride, + r, istride, m * ostride); +#endif + + codelet = p->nodeu.rader.codelet; + W = p->nodeu.rader.tw->twarray; + codelet(out, W, m, r, ostride, + p->nodeu.rader.rader_data); + + break; + } + + default: + fftw_die("BUG in executor: invalid plan\n"); + break; + } +} + +static void executor_simple_inplace(int n, fftw_complex *in, + fftw_complex *out, + fftw_plan_node *p, + int istride, + fftw_recurse_kind recurse_kind) +{ + switch (p->type) { + case FFTW_NOTW: + HACK_ALIGN_STACK_ODD; + (p->nodeu.notw.codelet)(in, in, istride, istride); + break; + + default: + { + fftw_complex *tmp; + + if (out) + tmp = out; + else + tmp = (fftw_complex *) + fftw_malloc(n * sizeof(fftw_complex)); + + fftw_executor_simple(n, in, tmp, p, istride, 1, + recurse_kind); + fftw_strided_copy(n, tmp, istride, in); + + if (!out) + fftw_free(tmp); + } + } +} + +static void executor_many_inplace(int n, fftw_complex *in, + fftw_complex *out, + fftw_plan_node *p, + int istride, + int howmany, int idist, + fftw_recurse_kind recurse_kind) +{ + switch (p->type) { + case FFTW_NOTW: + { + fftw_notw_codelet *codelet = p->nodeu.notw.codelet; + int s; + + HACK_ALIGN_STACK_ODD; + for (s = 0; s < howmany; ++s) + codelet(in + s * idist, + in + s * idist, + istride, istride); + break; + } + + default: + { + int s; + fftw_complex *tmp; + if (out) + tmp = out; + else + tmp = (fftw_complex *) + fftw_malloc(n * sizeof(fftw_complex)); + + for (s = 0; s < howmany; ++s) { + fftw_executor_simple(n, + in + s * idist, + tmp, + p, istride, 1, recurse_kind); + fftw_strided_copy(n, tmp, istride, in + s * idist); + } + + if (!out) + fftw_free(tmp); + } + } +} + +/* user interface */ +void fftw(fftw_plan plan, int howmany, fftw_complex *in, int istride, + int idist, fftw_complex *out, int ostride, int odist) +{ + int n = plan->n; + + if (plan->flags & FFTW_IN_PLACE) { + if (howmany == 1) { + executor_simple_inplace(n, in, out, plan->root, istride, + plan->recurse_kind); + } else { + executor_many_inplace(n, in, out, plan->root, istride, howmany, + idist, plan->recurse_kind); + } + } else { + if (howmany == 1) { + fftw_executor_simple(n, in, out, plan->root, istride, ostride, + plan->recurse_kind); + } else { +#ifdef FFTW_ENABLE_VECTOR_RECURSE + int vector_size = plan->vector_size; + if (vector_size <= 1) +#endif + executor_many(n, in, out, plan->root, istride, ostride, + howmany, idist, odist, plan->recurse_kind); +#ifdef FFTW_ENABLE_VECTOR_RECURSE + else { + int s; + int num_vects = howmany / vector_size; + fftw_plan_node *root = plan->root; + + for (s = 0; s < num_vects; ++s) + executor_many_vector(n, + in + s * (vector_size * idist), + out + s * (vector_size * odist), + root, + istride, ostride, + vector_size, idist, odist); + + s = howmany % vector_size; + if (s > 0) + executor_many(n, + in + num_vects * (vector_size * idist), + out + num_vects * (vector_size * odist), + root, + istride, ostride, + s, idist, odist, + FFTW_NORMAL_RECURSE); + } +#endif + } + } +} + +void fftw_one(fftw_plan plan, fftw_complex *in, fftw_complex *out) +{ + int n = plan->n; + + if (plan->flags & FFTW_IN_PLACE) + executor_simple_inplace(n, in, out, plan->root, 1, + plan->recurse_kind); + else + fftw_executor_simple(n, in, out, plan->root, 1, 1, + plan->recurse_kind); +} diff --git a/src/fftw/fftw-int.h b/src/fftw/fftw-int.h new file mode 100644 index 0000000..2c363fc --- /dev/null +++ b/src/fftw/fftw-int.h @@ -0,0 +1,500 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* fftw.h -- system-wide definitions */ +/* $Id: fftw-int.h,v 1.1 2008/10/17 06:13:18 scuri Exp $ */ + +#ifndef FFTW_INT_H +#define FFTW_INT_H +#include "config.h" +#include "fftw.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/****************************************************************************/ +/* Private Functions */ +/****************************************************************************/ + +extern fftw_twiddle *fftw_create_twiddle(int n, const fftw_codelet_desc *d); +extern void fftw_destroy_twiddle(fftw_twiddle *tw); + +extern void fftw_strided_copy(int, fftw_complex *, int, fftw_complex *); +extern void fftw_executor_simple(int, const fftw_complex *, fftw_complex *, + fftw_plan_node *, int, int, + fftw_recurse_kind recurse_kind); + +extern fftwnd_plan fftwnd_create_plan_aux(int rank, const int *n, + fftw_direction dir, int flags); +extern fftw_plan *fftwnd_new_plan_array(int rank); +extern fftw_plan *fftwnd_create_plans_generic(fftw_plan *plans, + int rank, const int *n, + fftw_direction dir, int flags); +extern fftw_plan *fftwnd_create_plans_specific(fftw_plan *plans, + int rank, const int *n, + const int *n_after, + fftw_direction dir, int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride); +extern int fftwnd_work_size(int rank, const int *n, int flags, int ncopies); + +extern void fftwnd_aux(fftwnd_plan p, int cur_dim, + fftw_complex *in, int istride, + fftw_complex *out, int ostride, + fftw_complex *work); +extern void fftwnd_aux_howmany(fftwnd_plan p, int cur_dim, + int howmany, + fftw_complex *in, int istride, int idist, + fftw_complex *out, int ostride, int odist, + fftw_complex *work); + +/* wisdom prototypes */ +enum fftw_wisdom_category { + FFTW_WISDOM, RFFTW_WISDOM +}; + +extern int fftw_wisdom_lookup(int n, int flags, fftw_direction dir, + enum fftw_wisdom_category category, + int istride, int ostride, + enum fftw_node_type *type, + int *signature, + fftw_recurse_kind *recurse_kind, int replace_p); +extern void fftw_wisdom_add(int n, int flags, fftw_direction dir, + enum fftw_wisdom_category cat, + int istride, int ostride, + enum fftw_node_type type, + int signature, + fftw_recurse_kind recurse_kind); + +/* Private planner functions: */ +extern double fftw_estimate_node(fftw_plan_node *p); +extern fftw_plan_node *fftw_make_node_notw(int size, + const fftw_codelet_desc *config); +extern fftw_plan_node *fftw_make_node_real2hc(int size, + const fftw_codelet_desc *config); +extern fftw_plan_node *fftw_make_node_hc2real(int size, + const fftw_codelet_desc *config); +extern fftw_plan_node *fftw_make_node_twiddle(int n, + const fftw_codelet_desc *config, + fftw_plan_node *recurse, + int flags); +extern fftw_plan_node *fftw_make_node_hc2hc(int n, + fftw_direction dir, + const fftw_codelet_desc *config, + fftw_plan_node *recurse, + int flags); +extern fftw_plan_node *fftw_make_node_generic(int n, int size, + fftw_generic_codelet *codelet, + fftw_plan_node *recurse, + int flags); +extern fftw_plan_node *fftw_make_node_rgeneric(int n, int size, + fftw_direction dir, + fftw_rgeneric_codelet * codelet, + fftw_plan_node *recurse, + int flags); +extern int fftw_factor(int n); +extern fftw_plan_node *fftw_make_node(void); +extern fftw_plan fftw_make_plan(int n, fftw_direction dir, + fftw_plan_node *root, int flags, + enum fftw_node_type wisdom_type, + int wisdom_signature, + fftw_recurse_kind recurse_kind, + int vector_size); +extern void fftw_use_plan(fftw_plan p); +extern void fftw_use_node(fftw_plan_node *p); +extern void fftw_destroy_plan_internal(fftw_plan p); +extern fftw_plan fftw_pick_better(fftw_plan p1, fftw_plan p2); +extern fftw_plan fftw_lookup(fftw_plan *table, int n, int flags, + int vector_size); +extern void fftw_insert(fftw_plan *table, fftw_plan this_plan); +extern void fftw_make_empty_table(fftw_plan *table); +extern void fftw_destroy_table(fftw_plan *table); +extern void fftw_complete_twiddle(fftw_plan_node *p, int n); + +extern fftw_plan_node *fftw_make_node_rader(int n, int size, + fftw_direction dir, + fftw_plan_node *recurse, + int flags); +extern fftw_rader_data *fftw_rader_top; + +/* undocumented debugging hook */ +typedef void (*fftw_plan_hook_ptr) (fftw_plan plan); +extern DL_IMPORT(fftw_plan_hook_ptr) fftw_plan_hook; +extern DL_IMPORT(fftw_plan_hook_ptr) rfftw_plan_hook; + +/****************************************************************************/ +/* Overflow-safe multiply */ +/****************************************************************************/ + +/* The Rader routines do a lot of operations of the form (x * y) % p, which + are vulnerable to overflow problems for large p. To get around this, + we either use "long long" arithmetic (if it is available and double + the size of int), or default to a subroutine defined in twiddle.c. */ + +#if defined(FFTW_ENABLE_UNSAFE_MULMOD) +# define MULMOD(x,y,p) (((x) * (y)) % (p)) +#elif defined(LONGLONG_IS_TWOINTS) +# define MULMOD(x,y,p) ((int) ((((long long) (x)) * ((long long) (y))) \ + % ((long long) (p)))) +#else +# define USE_FFTW_SAFE_MULMOD +# define MULMOD(x,y,p) fftw_safe_mulmod(x,y,p) +extern int fftw_safe_mulmod(int x, int y, int p); +#endif + +/****************************************************************************/ +/* Floating Point Types */ +/****************************************************************************/ + +/* + * We use these definitions to make it easier for people to change + * FFTW to use long double and similar types. You shouldn't have to + * change this just to use float or double. + */ + +/* + * Change this if your floating-point constants need to be expressed + * in a special way. For example, if fftw_real is long double, you + * will need to append L to your fp constants to make them of the + * same precision. Do this by changing "x" below to "x##L". + */ +#define FFTW_KONST(x) ((fftw_real) x) + +/* + * Ordinarily, we use the standard sin/cos functions to compute trig. + * constants. You'll need to change these if fftw_real has more + * than double precision. + */ +#define FFTW_TRIG_SIN sin +#define FFTW_TRIG_COS cos +typedef double FFTW_TRIG_REAL; /* the argument type for sin and cos */ + +#define FFTW_K2PI FFTW_KONST(6.2831853071795864769252867665590057683943388) + +/****************************************************************************/ +/* gcc/x86 hacks */ +/****************************************************************************/ + +/* + * gcc 2.[78].x and x86 specific hacks. These macros align the stack + * pointer so that the double precision temporary variables in the + * codelets will be aligned to a multiple of 8 bytes (*way* faster on + * pentium and pentiumpro) + */ +#ifdef __GNUC__ +# ifdef __i386__ +# ifdef FFTW_ENABLE_I386_HACKS +# ifndef FFTW_GCC_ALIGNS_STACK +# ifndef FFTW_ENABLE_FLOAT +# define FFTW_USING_I386_HACKS +# define HACK_ALIGN_STACK_EVEN { \ + if ((((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4); \ + } + +# define HACK_ALIGN_STACK_ODD { \ + if (!(((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4); \ + } + +# endif /* ! FFTW_ENABLE_FLOAT */ +# endif /* ! FFTW_GCC_ALIGNS_STACK */ +# endif /* FFTW_ENABLE_I386_HACKS */ + +# ifdef FFTW_DEBUG_ALIGNMENT +# define ASSERT_ALIGNED_DOUBLE { \ + double __foo; \ + if ((((long) &__foo) & 0x7)) abort(); \ + } +# endif /* FFTW_DEBUG_ALIGNMENT */ + +# endif /* __i386__ */ +#endif /* __GNUC__ */ + +#ifndef HACK_ALIGN_STACK_EVEN +# define HACK_ALIGN_STACK_EVEN {} +#endif +#ifndef HACK_ALIGN_STACK_ODD +# define HACK_ALIGN_STACK_ODD {} +#endif +#ifndef ASSERT_ALIGNED_DOUBLE +# define ASSERT_ALIGNED_DOUBLE {} +#endif + +/****************************************************************************/ +/* Timers */ +/****************************************************************************/ + +/* + * Here, you can use all the nice timers available in your machine. + */ + +/* + * + Things you should define to include your own clock: + + fftw_time -- the data type used to store a time + + extern fftw_time fftw_get_time(void); + -- a function returning the current time. (We have + implemented this as a macro in most cases.) + + extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2); + -- returns the time difference (t1 - t2). + If t1 < t2, it may simply return zero (although this + is not required). (We have implemented this as a macro + in most cases.) + + extern double fftw_time_to_sec(fftw_time t); + -- returns the time t expressed in seconds, as a double. + (Implemented as a macro in most cases.) + + FFTW_TIME_MIN -- a double-precision macro holding the minimum + time interval (in seconds) for accurate time measurements. + This should probably be at least 100 times the precision of + your clock (we use even longer intervals, to be conservative). + This will determine how long the planner takes to measure + the speeds of different possible plans. + + Bracket all of your definitions with an appropriate #ifdef so that + they will be enabled on your machine. If you do add your own + high-precision timer code, let us know (at fftw@fftw.org). + + Only declarations should go in this file. Any function definitions + that you need should go into timer.c. + */ + +/* + * define a symbol so that we know that we have the fftw_time_diff + * function/macro (it did not exist prior to FFTW 1.2) + */ +#define FFTW_HAS_TIME_DIFF + +/********************************************** + * SOLARIS + **********************************************/ +#if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) + +/* we use the nanosecond virtual timer */ +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif + +typedef hrtime_t fftw_time; + +#define fftw_get_time() gethrtime() +#define fftw_time_diff(t1,t2) ((t1) - (t2)) +#define fftw_time_to_sec(t) ((double) t / 1.0e9) + +/* + * a measurement is valid if it runs for at least + * FFTW_TIME_MIN seconds. + */ +#define FFTW_TIME_MIN (1.0e-4) /* for Solaris nanosecond timer */ +#define FFTW_TIME_REPEAT 8 + +/********************************************** + * Pentium time stamp counter + **********************************************/ +#elif defined(__GNUC__) && defined(__i386__) && defined(FFTW_ENABLE_PENTIUM_TIMER) + +/* + * Use internal Pentium register (time stamp counter). Resolution + * is 1/FFTW_CYCLES_PER_SEC seconds (e.g. 5 ns for Pentium 200 MHz). + * (This code was contributed by Wolfgang Reimer) + */ + +#ifndef FFTW_CYCLES_PER_SEC +#error "Must define FFTW_CYCLES_PER_SEC in fftw/config.h to use the Pentium cycle counter" +#endif + +typedef unsigned long long fftw_time; + +static __inline__ fftw_time read_tsc() +{ + fftw_time ret; + + __asm__ __volatile__("rdtsc": "=A" (ret)); + /* no input, nothing else clobbered */ + return ret; +} + +#define fftw_get_time() read_tsc() +#define fftw_time_diff(t1,t2) ((t1) - (t2)) +#define fftw_time_to_sec(t) (((double) (t)) / FFTW_CYCLES_PER_SEC) +#define FFTW_TIME_MIN (1.0e-4) /* for Pentium TSC register */ + +/************* generic systems having gettimeofday ************/ +#elif defined(HAVE_GETTIMEOFDAY) || defined(HAVE_BSDGETTIMEOFDAY) +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#define FFTW_USE_GETTIMEOFDAY + +typedef struct timeval fftw_time; + +extern fftw_time fftw_gettimeofday_get_time(void); +extern fftw_time fftw_gettimeofday_time_diff(fftw_time t1, fftw_time t2); +#define fftw_get_time() fftw_gettimeofday_get_time() +#define fftw_time_diff(t1, t2) fftw_gettimeofday_time_diff(t1, t2) +#define fftw_time_to_sec(t) ((double)(t).tv_sec + (double)(t).tv_usec * 1.0E-6) + +#ifndef FFTW_TIME_MIN +/* this should be fine on any system claiming a microsecond timer */ +#define FFTW_TIME_MIN (1.0e-2) +#endif + +/********************************************** + * MACINTOSH + **********************************************/ +#elif defined(HAVE_MAC_TIMER) + +/* + * By default, use the microsecond-timer in the Mac Time Manager. + * Alternatively, by changing the following #if 1 to #if 0, you + * can use the nanosecond timer available *only* on PCI PowerMacs. + * WARNING: the nanosecond timer was just a little experiment; + * I haven't gotten it to work reliably. Tips/patches are welcome. + */ +#ifndef HAVE_MAC_PCI_TIMER /* use time manager */ + +/* + * Use Macintosh Time Manager routines (maximum resolution is about 20 + * microseconds). + */ +typedef struct fftw_time_struct { + unsigned long hi, lo; +} fftw_time; + +extern fftw_time get_Mac_microseconds(void); + +#define fftw_get_time() get_Mac_microseconds() + +/* define as a function instead of a macro: */ +extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2); + +#define fftw_time_to_sec(t) ((t).lo * 1.0e-6 + 4294967295.0e-6 * (t).hi) + +/* very conservative, since timer should be accurate to 20e-6: */ +/* (although this seems not to be the case in practice) */ +#define FFTW_TIME_MIN (5.0e-2) /* for MacOS Time Manager timer */ + +#else /* use nanosecond timer */ + +/* Use the nanosecond timer available on PCI PowerMacs. */ + +#include <DriverServices.h> + +typedef AbsoluteTime fftw_time; +#define fftw_get_time() UpTime() +#define fftw_time_diff(t1,t2) SubAbsoluteFromAbsolute(t1,t2) +#define fftw_time_to_sec(t) (AbsoluteToNanoseconds(t).lo * 1.0e-9) + +/* Extremely conservative minimum time: */ +/* for MacOS PCI PowerMac nanosecond timer */ +#define FFTW_TIME_MIN (5.0e-3) + +#endif /* use nanosecond timer */ + +/********************************************** + * WINDOWS + **********************************************/ +#elif defined(HAVE_WIN32_TIMER) + +#include <time.h> + +typedef unsigned long fftw_time; +extern unsigned long GetPerfTime(void); +extern double GetPerfSec(double ticks); + +#define fftw_get_time() GetPerfTime() +#define fftw_time_diff(t1,t2) ((t1) - (t2)) +#define fftw_time_to_sec(t) GetPerfSec(t) + +#define FFTW_TIME_MIN (5.0e-2) /* for Win32 timer */ + +/********************************************** + * CRAY + **********************************************/ +#elif defined(_CRAYMPP) /* Cray MPP system */ + +double SECONDR(void); /* + * I think you have to link with -lsci to + * get this + */ + +typedef double fftw_time; +#define fftw_get_time() SECONDR() +#define fftw_time_diff(t1,t2) ((t1) - (t2)) +#define fftw_time_to_sec(t) (t) + +#define FFTW_TIME_MIN (1.0e-1) /* for Cray MPP SECONDR timer */ + +/********************************************** + * VANILLA UNIX/ISO C SYSTEMS + **********************************************/ +/* last resort: use good old Unix clock() */ +#else + +#include <time.h> + +typedef clock_t fftw_time; + +#ifndef CLOCKS_PER_SEC +#ifdef sun +/* stupid sunos4 prototypes */ +#define CLOCKS_PER_SEC 1000000 +extern long clock(void); +#else /* not sun, we don't know CLOCKS_PER_SEC */ +#error Please define CLOCKS_PER_SEC +#endif +#endif + +#define fftw_get_time() clock() +#define fftw_time_diff(t1,t2) ((t1) - (t2)) +#define fftw_time_to_sec(t) (((double) (t)) / CLOCKS_PER_SEC) + +/* + * ***VERY*** conservative constant: this says that a + * measurement must run for 200ms in order to be valid. + * You had better check the manual of your machine + * to discover if it can do better than this + */ +#define FFTW_TIME_MIN (2.0e-1) /* for default clock() timer */ + +#endif /* UNIX clock() */ + +/* take FFTW_TIME_REPEAT measurements... */ +#ifndef FFTW_TIME_REPEAT +#define FFTW_TIME_REPEAT 4 +#endif + +/* but do not run for more than TIME_LIMIT seconds while measuring one FFT */ +#ifndef FFTW_TIME_LIMIT +#define FFTW_TIME_LIMIT 2.0 +#endif + +#ifdef __cplusplus +} /* extern "C" */ + +#endif /* __cplusplus */ + +#endif /* FFTW_INT_H */ diff --git a/src/fftw/fftw.h b/src/fftw/fftw.h new file mode 100644 index 0000000..3ec3c49 --- /dev/null +++ b/src/fftw/fftw.h @@ -0,0 +1,421 @@ +/* -*- C -*- */ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* fftw.h -- system-wide definitions */ +/* $Id: fftw.h,v 1.1 2008/10/17 06:13:18 scuri Exp $ */ + +#ifndef FFTW_H +#define FFTW_H + +#include <stdlib.h> +#include <stdio.h> + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* Define for using single precision */ +/* + * If you can, use configure --enable-float instead of changing this + * flag directly + */ +/* #undef FFTW_ENABLE_FLOAT */ + +/* our real numbers */ +#ifdef FFTW_ENABLE_FLOAT +typedef float fftw_real; +#else +typedef double fftw_real; +#endif + +/********************************************* + * Complex numbers and operations + *********************************************/ +typedef struct { + fftw_real re, im; +} fftw_complex; +#define c_re(c) ((c).re) +#define c_im(c) ((c).im) + +typedef enum { + FFTW_FORWARD = -1, FFTW_BACKWARD = 1 +} fftw_direction; + +/* backward compatibility with FFTW-1.3 */ +typedef fftw_complex FFTW_COMPLEX; +typedef fftw_real FFTW_REAL; + +#ifndef FFTW_1_0_COMPATIBILITY +#define FFTW_1_0_COMPATIBILITY 0 +#endif + +#if FFTW_1_0_COMPATIBILITY +/* backward compatibility with FFTW-1.0 */ +#define REAL fftw_real +#define COMPLEX fftw_complex +#endif + +/********************************************* + * Success or failure status + *********************************************/ + +typedef enum { + FFTW_SUCCESS = 0, FFTW_FAILURE = -1 +} fftw_status; + +/********************************************* + * Codelets + *********************************************/ +typedef void (fftw_notw_codelet) + (const fftw_complex *, fftw_complex *, int, int); +typedef void (fftw_twiddle_codelet) + (fftw_complex *, const fftw_complex *, int, + int, int); +typedef void (fftw_generic_codelet) + (fftw_complex *, const fftw_complex *, int, + int, int, int); +typedef void (fftw_real2hc_codelet) + (const fftw_real *, fftw_real *, fftw_real *, + int, int, int); +typedef void (fftw_hc2real_codelet) + (const fftw_real *, const fftw_real *, + fftw_real *, int, int, int); +typedef void (fftw_hc2hc_codelet) + (fftw_real *, const fftw_complex *, + int, int, int); +typedef void (fftw_rgeneric_codelet) + (fftw_real *, const fftw_complex *, int, + int, int, int); + +/********************************************* + * Configurations + *********************************************/ +/* + * A configuration is a database of all known codelets + */ + +enum fftw_node_type { + FFTW_NOTW, FFTW_TWIDDLE, FFTW_GENERIC, FFTW_RADER, + FFTW_REAL2HC, FFTW_HC2REAL, FFTW_HC2HC, FFTW_RGENERIC +}; + +/* description of a codelet */ +typedef struct { + const char *name; /* name of the codelet */ + void (*codelet) (); /* pointer to the codelet itself */ + int size; /* size of the codelet */ + fftw_direction dir; /* direction */ + enum fftw_node_type type; /* TWIDDLE or NO_TWIDDLE */ + int signature; /* unique id */ + int ntwiddle; /* number of twiddle factors */ + const int *twiddle_order; /* + * array that determines the order + * in which the codelet expects + * the twiddle factors + */ +} fftw_codelet_desc; + +/* On Win32, you need to do funny things to access global variables + in shared libraries. Thanks to Andrew Sterian for this hack. */ +#ifdef HAVE_WIN32 +# if defined(BUILD_FFTW_DLL) +# define DL_IMPORT(type) __declspec(dllexport) type +# elif defined(USE_FFTW_DLL) +# define DL_IMPORT(type) __declspec(dllimport) type +# else +# define DL_IMPORT(type) type +# endif +#else +# define DL_IMPORT(type) type +#endif + +extern DL_IMPORT(const char *) fftw_version; + +/***************************** + * Plans + *****************************/ +/* + * A plan is a sequence of reductions to compute a FFT of + * a given size. At each step, the FFT algorithm can: + * + * 1) apply a notw codelet, or + * 2) recurse and apply a twiddle codelet, or + * 3) apply the generic codelet. + */ + +/* structure that contains twiddle factors */ +typedef struct fftw_twiddle_struct { + int n; + const fftw_codelet_desc *cdesc; + fftw_complex *twarray; + struct fftw_twiddle_struct *next; + int refcnt; +} fftw_twiddle; + +typedef struct fftw_rader_data_struct { + struct fftw_plan_struct *plan; + fftw_complex *omega; + int g, ginv; + int p, flags, refcount; + struct fftw_rader_data_struct *next; + fftw_codelet_desc *cdesc; +} fftw_rader_data; + +typedef void (fftw_rader_codelet) + (fftw_complex *, const fftw_complex *, int, + int, int, fftw_rader_data *); + +/* structure that holds all the data needed for a given step */ +typedef struct fftw_plan_node_struct { + enum fftw_node_type type; + + union { + /* nodes of type FFTW_NOTW */ + struct { + int size; + fftw_notw_codelet *codelet; + const fftw_codelet_desc *codelet_desc; + } notw; + + /* nodes of type FFTW_TWIDDLE */ + struct { + int size; + fftw_twiddle_codelet *codelet; + fftw_twiddle *tw; + struct fftw_plan_node_struct *recurse; + const fftw_codelet_desc *codelet_desc; + } twiddle; + + /* nodes of type FFTW_GENERIC */ + struct { + int size; + fftw_generic_codelet *codelet; + fftw_twiddle *tw; + struct fftw_plan_node_struct *recurse; + } generic; + + /* nodes of type FFTW_RADER */ + struct { + int size; + fftw_rader_codelet *codelet; + fftw_rader_data *rader_data; + fftw_twiddle *tw; + struct fftw_plan_node_struct *recurse; + } rader; + + /* nodes of type FFTW_REAL2HC */ + struct { + int size; + fftw_real2hc_codelet *codelet; + const fftw_codelet_desc *codelet_desc; + } real2hc; + + /* nodes of type FFTW_HC2REAL */ + struct { + int size; + fftw_hc2real_codelet *codelet; + const fftw_codelet_desc *codelet_desc; + } hc2real; + + /* nodes of type FFTW_HC2HC */ + struct { + int size; + fftw_direction dir; + fftw_hc2hc_codelet *codelet; + fftw_twiddle *tw; + struct fftw_plan_node_struct *recurse; + const fftw_codelet_desc *codelet_desc; + } hc2hc; + + /* nodes of type FFTW_RGENERIC */ + struct { + int size; + fftw_direction dir; + fftw_rgeneric_codelet *codelet; + fftw_twiddle *tw; + struct fftw_plan_node_struct *recurse; + } rgeneric; + } nodeu; + + int refcnt; +} fftw_plan_node; + +typedef enum { + FFTW_NORMAL_RECURSE = 0, + FFTW_VECTOR_RECURSE = 1 +} fftw_recurse_kind; + +struct fftw_plan_struct { + int n; + int refcnt; + fftw_direction dir; + int flags; + int wisdom_signature; + enum fftw_node_type wisdom_type; + struct fftw_plan_struct *next; + fftw_plan_node *root; + double cost; + fftw_recurse_kind recurse_kind; + int vector_size; +}; + +typedef struct fftw_plan_struct *fftw_plan; + +/* flags for the planner */ +#define FFTW_ESTIMATE (0) +#define FFTW_MEASURE (1) + +#define FFTW_OUT_OF_PLACE (0) +#define FFTW_IN_PLACE (8) +#define FFTW_USE_WISDOM (16) + +#define FFTW_THREADSAFE (128) /* guarantee plan is read-only so that the + same plan can be used in parallel by + multiple threads */ + +#define FFTWND_FORCE_BUFFERED (256) /* internal flag, forces buffering + in fftwnd transforms */ + +#define FFTW_NO_VECTOR_RECURSE (512) /* internal flag, prevents use + of vector recursion */ + +extern fftw_plan fftw_create_plan_specific(int n, fftw_direction dir, + int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride); +#define FFTW_HAS_PLAN_SPECIFIC +extern fftw_plan fftw_create_plan(int n, fftw_direction dir, int flags); +extern void fftw_print_plan(fftw_plan plan); +extern void fftw_destroy_plan(fftw_plan plan); +extern void fftw(fftw_plan plan, int howmany, fftw_complex *in, int istride, + int idist, fftw_complex *out, int ostride, int odist); +extern void fftw_one(fftw_plan plan, fftw_complex *in, fftw_complex *out); +extern void fftw_die(const char *s); +extern void *fftw_malloc(size_t n); +extern void fftw_free(void *p); +extern void fftw_check_memory_leaks(void); +extern void fftw_print_max_memory_usage(void); + +typedef void *(*fftw_malloc_type_function) (size_t n); +typedef void (*fftw_free_type_function) (void *p); +typedef void (*fftw_die_type_function) (const char *errString); +extern DL_IMPORT(fftw_malloc_type_function) fftw_malloc_hook; +extern DL_IMPORT(fftw_free_type_function) fftw_free_hook; +extern DL_IMPORT(fftw_die_type_function) fftw_die_hook; + +extern size_t fftw_sizeof_fftw_real(void); + +/* Wisdom: */ +/* + * define this symbol so that users know we are using a version of FFTW + * with wisdom + */ +#define FFTW_HAS_WISDOM +extern void fftw_forget_wisdom(void); +extern void fftw_export_wisdom(void (*emitter) (char c, void *), void *data); +extern fftw_status fftw_import_wisdom(int (*g) (void *), void *data); +extern void fftw_export_wisdom_to_file(FILE *output_file); +extern fftw_status fftw_import_wisdom_from_file(FILE *input_file); +extern char *fftw_export_wisdom_to_string(void); +extern fftw_status fftw_import_wisdom_from_string(const char *input_string); + +/* + * define symbol so we know this function is available (it is not in + * older FFTWs) + */ +#define FFTW_HAS_FPRINT_PLAN +extern void fftw_fprint_plan(FILE *f, fftw_plan plan); + +/***************************** + * N-dimensional code + *****************************/ +typedef struct { + int is_in_place; /* 1 if for in-place FFTs, 0 otherwise */ + + int rank; /* + * the rank (number of dimensions) of the + * array to be FFTed + */ + int *n; /* + * the dimensions of the array to the + * FFTed + */ + fftw_direction dir; + + int *n_before; /* + * n_before[i] = product of n[j] for j < i + */ + int *n_after; /* n_after[i] = product of n[j] for j > i */ + + fftw_plan *plans; /* 1d fftw plans for each dimension */ + + int nbuffers, nwork; + fftw_complex *work; /* + * work array big enough to hold + * nbuffers+1 of the largest dimension + * (has nwork elements) + */ +} fftwnd_data; + +typedef fftwnd_data *fftwnd_plan; + +/* Initializing the FFTWND plan: */ +extern fftwnd_plan fftw2d_create_plan(int nx, int ny, fftw_direction dir, + int flags); +extern fftwnd_plan fftw3d_create_plan(int nx, int ny, int nz, + fftw_direction dir, int flags); +extern fftwnd_plan fftwnd_create_plan(int rank, const int *n, + fftw_direction dir, + int flags); + +extern fftwnd_plan fftw2d_create_plan_specific(int nx, int ny, + fftw_direction dir, + int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride); +extern fftwnd_plan fftw3d_create_plan_specific(int nx, int ny, int nz, + fftw_direction dir, int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride); +extern fftwnd_plan fftwnd_create_plan_specific(int rank, const int *n, + fftw_direction dir, + int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride); + +/* Freeing the FFTWND plan: */ +extern void fftwnd_destroy_plan(fftwnd_plan plan); + +/* Printing the plan: */ +extern void fftwnd_fprint_plan(FILE *f, fftwnd_plan p); +extern void fftwnd_print_plan(fftwnd_plan p); +#define FFTWND_HAS_PRINT_PLAN + +/* Computing the N-Dimensional FFT */ +extern void fftwnd(fftwnd_plan plan, int howmany, + fftw_complex *in, int istride, int idist, + fftw_complex *out, int ostride, int odist); +extern void fftwnd_one(fftwnd_plan p, fftw_complex *in, fftw_complex *out); + +#ifdef __cplusplus +} /* extern "C" */ + +#endif /* __cplusplus */ +#endif /* FFTW_H */ diff --git a/src/fftw/fftwnd.c b/src/fftw/fftwnd.c new file mode 100644 index 0000000..57354b0 --- /dev/null +++ b/src/fftw/fftwnd.c @@ -0,0 +1,806 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* $Id: fftwnd.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */ + +#include "fftw-int.h" + +/* the number of buffers to use for buffered transforms: */ +#define FFTWND_NBUFFERS 8 + +/* the default number of buffers to use: */ +#define FFTWND_DEFAULT_NBUFFERS 0 + +/* the number of "padding" elements between consecutive buffer lines */ +#define FFTWND_BUFFER_PADDING 8 + +static void destroy_plan_array(int rank, fftw_plan *plans); + +static void init_test_array(fftw_complex *arr, int stride, int n) +{ + int j; + + for (j = 0; j < n; ++j) { + c_re(arr[stride * j]) = 0.0; + c_im(arr[stride * j]) = 0.0; + } +} + +/* + * Same as fftw_measure_runtime, except for fftwnd plan. + */ +double fftwnd_measure_runtime(fftwnd_plan plan, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + fftw_time begin, end, start; + double t, tmax, tmin; + int i, iter; + int n; + int repeat; + + if (plan->rank == 0) + return 0.0; + + n = 1; + for (i = 0; i < plan->rank; ++i) + n *= plan->n[i]; + + iter = 1; + + for (;;) { + tmin = 1.0E10; + tmax = -1.0E10; + init_test_array(in, istride, n); + + start = fftw_get_time(); + /* repeat the measurement FFTW_TIME_REPEAT times */ + for (repeat = 0; repeat < FFTW_TIME_REPEAT; ++repeat) { + begin = fftw_get_time(); + for (i = 0; i < iter; ++i) { + fftwnd(plan, 1, in, istride, 0, out, ostride, 0); + } + end = fftw_get_time(); + + t = fftw_time_to_sec(fftw_time_diff(end, begin)); + if (t < tmin) + tmin = t; + if (t > tmax) + tmax = t; + + /* do not run for too long */ + t = fftw_time_to_sec(fftw_time_diff(end, start)); + if (t > FFTW_TIME_LIMIT) + break; + } + + if (tmin >= FFTW_TIME_MIN) + break; + + iter *= 2; + } + + tmin /= (double) iter; + tmax /= (double) iter; + + return tmin; +} + +/********************** Initializing the FFTWND Plan ***********************/ + +/* Initialize everything except for the 1D plans and the work array: */ +fftwnd_plan fftwnd_create_plan_aux(int rank, const int *n, + fftw_direction dir, int flags) +{ + int i; + fftwnd_plan p; + + if (rank < 0) + return 0; + + for (i = 0; i < rank; ++i) + if (n[i] <= 0) + return 0; + + p = (fftwnd_plan) fftw_malloc(sizeof(fftwnd_data)); + p->n = 0; + p->n_before = 0; + p->n_after = 0; + p->plans = 0; + p->work = 0; + p->dir = dir; + + p->rank = rank; + p->is_in_place = flags & FFTW_IN_PLACE; + + p->nwork = 0; + p->nbuffers = 0; + + if (rank == 0) + return 0; + + p->n = (int *) fftw_malloc(sizeof(int) * rank); + p->n_before = (int *) fftw_malloc(sizeof(int) * rank); + p->n_after = (int *) fftw_malloc(sizeof(int) * rank); + p->n_before[0] = 1; + p->n_after[rank - 1] = 1; + + for (i = 0; i < rank; ++i) { + p->n[i] = n[i]; + + if (i) { + p->n_before[i] = p->n_before[i - 1] * n[i - 1]; + p->n_after[rank - 1 - i] = p->n_after[rank - i] * n[rank - i]; + } + } + + return p; +} + +/* create an empty new array of rank 1d plans */ +fftw_plan *fftwnd_new_plan_array(int rank) +{ + fftw_plan *plans; + int i; + + plans = (fftw_plan *) fftw_malloc(rank * sizeof(fftw_plan)); + if (!plans) + return 0; + for (i = 0; i < rank; ++i) + plans[i] = 0; + return plans; +} + +/* + * create an array of plans using the ordinary 1d fftw_create_plan, + * which allocates its own array and creates plans optimized for + * contiguous data. + */ +fftw_plan *fftwnd_create_plans_generic(fftw_plan *plans, + int rank, const int *n, + fftw_direction dir, int flags) +{ + if (rank <= 0) + return 0; + + if (plans) { + int i, j; + int cur_flags; + + for (i = 0; i < rank; ++i) { + if (i < rank - 1 || (flags & FFTW_IN_PLACE)) { + /* + * fft's except the last dimension are always in-place + */ + cur_flags = flags | FFTW_IN_PLACE; + for (j = i - 1; j >= 0 && n[i] != n[j]; --j); + } else { + cur_flags = flags; + /* + * we must create a separate plan for the last + * dimension + */ + j = -1; + } + + if (j >= 0) { + /* + * If a plan already exists for this size + * array, reuse it: + */ + plans[i] = plans[j]; + } else { + /* generate a new plan: */ + plans[i] = fftw_create_plan(n[i], dir, cur_flags); + if (!plans[i]) { + destroy_plan_array(rank, plans); + return 0; + } + } + } + } + return plans; +} + +static int get_maxdim(int rank, const int *n, int flags) +{ + int i; + int maxdim = 0; + + for (i = 0; i < rank - 1; ++i) + if (n[i] > maxdim) + maxdim = n[i]; + if (rank > 0 && flags & FFTW_IN_PLACE && n[rank - 1] > maxdim) + maxdim = n[rank - 1]; + + return maxdim; +} + +/* compute number of elements required for work array (has to + be big enough to hold ncopies of the largest dimension in + n that will need an in-place transform. */ +int fftwnd_work_size(int rank, const int *n, int flags, int ncopies) +{ + return (ncopies * get_maxdim(rank, n, flags) + + (ncopies - 1) * FFTWND_BUFFER_PADDING); +} + +/* + * create plans using the fftw_create_plan_specific planner, which + * allows us to create plans for each dimension that are specialized + * for the strides that we are going to use. + */ +fftw_plan *fftwnd_create_plans_specific(fftw_plan *plans, + int rank, const int *n, + const int *n_after, + fftw_direction dir, int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + if (rank <= 0) + return 0; + + if (plans) { + int i, stride, cur_flags; + fftw_complex *work = 0; + int nwork; + + nwork = fftwnd_work_size(rank, n, flags, 1); + if (nwork) + work = (fftw_complex*)fftw_malloc(nwork * sizeof(fftw_complex)); + + for (i = 0; i < rank; ++i) { + /* fft's except the last dimension are always in-place */ + if (i < rank - 1) + cur_flags = flags | FFTW_IN_PLACE; + else + cur_flags = flags; + + /* stride for transforming ith dimension */ + stride = n_after[i]; + + if (cur_flags & FFTW_IN_PLACE) + plans[i] = fftw_create_plan_specific(n[i], dir, cur_flags, + in, istride * stride, + work, 1); + else + plans[i] = fftw_create_plan_specific(n[i], dir, cur_flags, + in, istride * stride, + out, ostride * stride); + if (!plans[i]) { + destroy_plan_array(rank, plans); + fftw_free(work); + return 0; + } + } + + if (work) + fftw_free(work); + } + return plans; +} + +/* + * Create an fftwnd_plan specialized for specific arrays. (These + * arrays are ignored, however, if they are NULL or if the flags do + * not include FFTW_MEASURE.) The main advantage of being provided + * arrays like this is that we can do runtime timing measurements of + * our options, without worrying about allocating excessive scratch + * space. + */ +fftwnd_plan fftwnd_create_plan_specific(int rank, const int *n, + fftw_direction dir, int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + fftwnd_plan p; + + if (!(p = fftwnd_create_plan_aux(rank, n, dir, flags))) + return 0; + + if (!(flags & FFTW_MEASURE) || in == 0 + || (!p->is_in_place && out == 0)) { + +/**** use default plan ****/ + + p->plans = fftwnd_create_plans_generic(fftwnd_new_plan_array(rank), + rank, n, dir, flags); + if (!p->plans) { + fftwnd_destroy_plan(p); + return 0; + } + if (flags & FFTWND_FORCE_BUFFERED) + p->nbuffers = FFTWND_NBUFFERS; + else + p->nbuffers = FFTWND_DEFAULT_NBUFFERS; + + p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1); + if (p->nwork && !(flags & FFTW_THREADSAFE)) { + p->work = (fftw_complex*) fftw_malloc(p->nwork + * sizeof(fftw_complex)); + if (!p->work) { + fftwnd_destroy_plan(p); + return 0; + } + } + } else { +/**** use runtime measurements to pick plan ****/ + + fftw_plan *plans_buf, *plans_nobuf; + double t_buf, t_nobuf; + + p->nwork = fftwnd_work_size(rank, n, flags, FFTWND_NBUFFERS + 1); + if (p->nwork && !(flags & FFTW_THREADSAFE)) { + p->work = (fftw_complex*) fftw_malloc(p->nwork + * sizeof(fftw_complex)); + if (!p->work) { + fftwnd_destroy_plan(p); + return 0; + } + } + else + p->work = (fftw_complex*) NULL; + + /* two possible sets of 1D plans: */ + plans_buf = fftwnd_create_plans_generic(fftwnd_new_plan_array(rank), + rank, n, dir, flags); + plans_nobuf = + fftwnd_create_plans_specific(fftwnd_new_plan_array(rank), + rank, n, p->n_after, dir, + flags, in, istride, + out, ostride); + if (!plans_buf || !plans_nobuf) { + destroy_plan_array(rank, plans_nobuf); + destroy_plan_array(rank, plans_buf); + fftwnd_destroy_plan(p); + return 0; + } + /* time the two possible plans */ + p->plans = plans_nobuf; + p->nbuffers = 0; + p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1); + t_nobuf = fftwnd_measure_runtime(p, in, istride, out, ostride); + p->plans = plans_buf; + p->nbuffers = FFTWND_NBUFFERS; + p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1); + t_buf = fftwnd_measure_runtime(p, in, istride, out, ostride); + + /* pick the better one: */ + if (t_nobuf < t_buf) { /* use unbuffered transform */ + p->plans = plans_nobuf; + p->nbuffers = 0; + + /* work array is unnecessarily large */ + if (p->work) + fftw_free(p->work); + p->work = 0; + + destroy_plan_array(rank, plans_buf); + + /* allocate a work array of the correct size: */ + p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1); + if (p->nwork && !(flags & FFTW_THREADSAFE)) { + p->work = (fftw_complex*) fftw_malloc(p->nwork + * sizeof(fftw_complex)); + if (!p->work) { + fftwnd_destroy_plan(p); + return 0; + } + } + } else { /* use buffered transform */ + destroy_plan_array(rank, plans_nobuf); + } + } + + return p; +} + +fftwnd_plan fftw2d_create_plan_specific(int nx, int ny, + fftw_direction dir, int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + int n[2]; + + n[0] = nx; + n[1] = ny; + + return fftwnd_create_plan_specific(2, n, dir, flags, + in, istride, out, ostride); +} + +fftwnd_plan fftw3d_create_plan_specific(int nx, int ny, int nz, + fftw_direction dir, int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + int n[3]; + + n[0] = nx; + n[1] = ny; + n[2] = nz; + + return fftwnd_create_plan_specific(3, n, dir, flags, + in, istride, out, ostride); +} + +/* Create a generic fftwnd plan: */ + +fftwnd_plan fftwnd_create_plan(int rank, const int *n, + fftw_direction dir, int flags) +{ + return fftwnd_create_plan_specific(rank, n, dir, flags, 0, 1, 0, 1); +} + +fftwnd_plan fftw2d_create_plan(int nx, int ny, + fftw_direction dir, int flags) +{ + return fftw2d_create_plan_specific(nx, ny, dir, flags, 0, 1, 0, 1); +} + +fftwnd_plan fftw3d_create_plan(int nx, int ny, int nz, + fftw_direction dir, int flags) +{ + return fftw3d_create_plan_specific(nx, ny, nz, dir, flags, 0, 1, 0, 1); +} + +/************************ Freeing the FFTWND Plan ************************/ + +static void destroy_plan_array(int rank, fftw_plan *plans) +{ + if (plans) { + int i, j; + + for (i = 0; i < rank; ++i) { + for (j = i - 1; + j >= 0 && plans[i] != plans[j]; + --j); + if (j < 0 && plans[i]) + fftw_destroy_plan(plans[i]); + } + fftw_free(plans); + } +} + +void fftwnd_destroy_plan(fftwnd_plan plan) +{ + if (plan) { + destroy_plan_array(plan->rank, plan->plans); + + if (plan->n) + fftw_free(plan->n); + + if (plan->n_before) + fftw_free(plan->n_before); + + if (plan->n_after) + fftw_free(plan->n_after); + + if (plan->work) + fftw_free(plan->work); + + fftw_free(plan); + } +} + +/************************ Printing the FFTWND Plan ************************/ + +void fftwnd_fprint_plan(FILE *f, fftwnd_plan plan) +{ + if (plan) { + int i, j; + + if (plan->rank == 0) { + fprintf(f, "plan for rank 0 (null) transform.\n"); + return; + } + fprintf(f, "plan for "); + for (i = 0; i < plan->rank; ++i) + fprintf(f, "%s%d", i ? "x" : "", plan->n[i]); + fprintf(f, " transform:\n"); + + if (plan->nbuffers > 0) + fprintf(f, " -- using buffered transforms (%d buffers)\n", + plan->nbuffers); + else + fprintf(f, " -- using unbuffered transform\n"); + + for (i = 0; i < plan->rank; ++i) { + fprintf(f, "* dimension %d (size %d) ", i, plan->n[i]); + + for (j = i - 1; j >= 0; --j) + if (plan->plans[j] == plan->plans[i]) + break; + + if (j < 0) + fftw_fprint_plan(f, plan->plans[i]); + else + fprintf(f, "plan is same as dimension %d plan.\n", j); + } + } +} + +void fftwnd_print_plan(fftwnd_plan plan) +{ + fftwnd_fprint_plan(stdout, plan); +} + +/********************* Buffered FFTW (in-place) *********************/ + +void fftw_buffered(fftw_plan p, int howmany, + fftw_complex *in, int istride, int idist, + fftw_complex *work, + int nbuffers, fftw_complex *buffers) +{ + int i = 0, n, nb; + + n = p->n; + nb = n + FFTWND_BUFFER_PADDING; + + do { + for (; i <= howmany - nbuffers; i += nbuffers) { + fftw_complex *cur_in = in + i * idist; + int j, buf; + + /* + * First, copy nbuffers strided arrays to the + * contiguous buffer arrays (reading consecutive + * locations, assuming that idist is 1): + */ + for (j = 0; j < n; ++j) { + fftw_complex *cur_in2 = cur_in + j * istride; + fftw_complex *cur_buffers = buffers + j; + + for (buf = 0; buf <= nbuffers - 4; buf += 4) { + fftw_real r0, i0, r1, i1, r2, i2, r3, i3; + r0 = c_re(cur_in2[0]); + i0 = c_im(cur_in2[0]); + r1 = c_re(cur_in2[idist]); + i1 = c_im(cur_in2[idist]); + r2 = c_re(cur_in2[2 * idist]); + i2 = c_im(cur_in2[2 * idist]); + r3 = c_re(cur_in2[3 * idist]); + i3 = c_im(cur_in2[3 * idist]); + c_re(cur_buffers[0]) = r0; + c_im(cur_buffers[0]) = i0; + c_re(cur_buffers[nb]) = r1; + c_im(cur_buffers[nb]) = i1; + c_re(cur_buffers[2 * nb]) = r2; + c_im(cur_buffers[2 * nb]) = i2; + c_re(cur_buffers[3 * nb]) = r3; + c_im(cur_buffers[3 * nb]) = i3; + cur_buffers += 4 * nb; + cur_in2 += 4 * idist; + } + for (; buf < nbuffers; ++buf) { + *cur_buffers = *cur_in2; + cur_buffers += nb; + cur_in2 += idist; + } + } + + /* + * Now, compute the FFTs in the buffers (in-place + * using work): + */ + fftw(p, nbuffers, buffers, 1, nb, work, 1, 0); + + /* + * Finally, copy the results back from the contiguous + * buffers to the strided arrays (writing consecutive + * locations): + */ + for (j = 0; j < n; ++j) { + fftw_complex *cur_in2 = cur_in + j * istride; + fftw_complex *cur_buffers = buffers + j; + + for (buf = 0; buf <= nbuffers - 4; buf += 4) { + fftw_real r0, i0, r1, i1, r2, i2, r3, i3; + r0 = c_re(cur_buffers[0]); + i0 = c_im(cur_buffers[0]); + r1 = c_re(cur_buffers[nb]); + i1 = c_im(cur_buffers[nb]); + r2 = c_re(cur_buffers[2 * nb]); + i2 = c_im(cur_buffers[2 * nb]); + r3 = c_re(cur_buffers[3 * nb]); + i3 = c_im(cur_buffers[3 * nb]); + c_re(cur_in2[0]) = r0; + c_im(cur_in2[0]) = i0; + c_re(cur_in2[idist]) = r1; + c_im(cur_in2[idist]) = i1; + c_re(cur_in2[2 * idist]) = r2; + c_im(cur_in2[2 * idist]) = i2; + c_re(cur_in2[3 * idist]) = r3; + c_im(cur_in2[3 * idist]) = i3; + cur_buffers += 4 * nb; + cur_in2 += 4 * idist; + } + for (; buf < nbuffers; ++buf) { + *cur_in2 = *cur_buffers; + cur_buffers += nb; + cur_in2 += idist; + } + } + } + + /* + * we skip howmany % nbuffers ffts at the end of the loop, + * so we have to go back and do them: + */ + nbuffers = howmany - i; + } while (i < howmany); +} + +/********************* Computing the N-Dimensional FFT *********************/ + +void fftwnd_aux(fftwnd_plan p, int cur_dim, + fftw_complex *in, int istride, + fftw_complex *out, int ostride, + fftw_complex *work) +{ + int n_after = p->n_after[cur_dim], n = p->n[cur_dim]; + + if (cur_dim == p->rank - 2) { + /* just do the last dimension directly: */ + if (p->is_in_place) + fftw(p->plans[p->rank - 1], n, + in, istride, n_after * istride, + work, 1, 0); + else + fftw(p->plans[p->rank - 1], n, + in, istride, n_after * istride, + out, ostride, n_after * ostride); + } else { /* we have at least two dimensions to go */ + int i; + + /* + * process the subsequent dimensions recursively, in hyperslabs, + * to get maximum locality: + */ + for (i = 0; i < n; ++i) + fftwnd_aux(p, cur_dim + 1, + in + i * n_after * istride, istride, + out + i * n_after * ostride, ostride, work); + } + + /* do the current dimension (in-place): */ + if (p->nbuffers == 0) { + fftw(p->plans[cur_dim], n_after, + out, n_after * ostride, ostride, + work, 1, 0); + } else /* using contiguous copy buffers: */ + fftw_buffered(p->plans[cur_dim], n_after, + out, n_after * ostride, ostride, + work, p->nbuffers, work + n); +} + +/* + * alternate version of fftwnd_aux -- this version pushes the howmany + * loop down to the leaves of the computation, for greater locality in + * cases where dist < stride + */ +void fftwnd_aux_howmany(fftwnd_plan p, int cur_dim, + int howmany, + fftw_complex *in, int istride, int idist, + fftw_complex *out, int ostride, int odist, + fftw_complex *work) +{ + int n_after = p->n_after[cur_dim], n = p->n[cur_dim]; + int k; + + if (cur_dim == p->rank - 2) { + /* just do the last dimension directly: */ + if (p->is_in_place) + for (k = 0; k < n; ++k) + fftw(p->plans[p->rank - 1], howmany, + in + k * n_after * istride, istride, idist, + work, 1, 0); + else + for (k = 0; k < n; ++k) + fftw(p->plans[p->rank - 1], howmany, + in + k * n_after * istride, istride, idist, + out + k * n_after * ostride, ostride, odist); + } else { /* we have at least two dimensions to go */ + int i; + + /* + * process the subsequent dimensions recursively, in + * hyperslabs, to get maximum locality: + */ + for (i = 0; i < n; ++i) + fftwnd_aux_howmany(p, cur_dim + 1, howmany, + in + i * n_after * istride, istride, idist, + out + i * n_after * ostride, ostride, odist, + work); + } + + /* do the current dimension (in-place): */ + if (p->nbuffers == 0) + for (k = 0; k < n_after; ++k) + fftw(p->plans[cur_dim], howmany, + out + k * ostride, n_after * ostride, odist, + work, 1, 0); + else /* using contiguous copy buffers: */ + for (k = 0; k < n_after; ++k) + fftw_buffered(p->plans[cur_dim], howmany, + out + k * ostride, n_after * ostride, odist, + work, p->nbuffers, work + n); +} + +void fftwnd(fftwnd_plan p, int howmany, + fftw_complex *in, int istride, int idist, + fftw_complex *out, int ostride, int odist) +{ + fftw_complex *work; + +#ifdef FFTW_DEBUG + if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE) + && p->nwork && p->work) + fftw_die("bug with FFTW_THREADSAFE flag\n"); +#endif + + if (p->nwork && !p->work) + work = (fftw_complex *) fftw_malloc(p->nwork * sizeof(fftw_complex)); + else + work = p->work; + + switch (p->rank) { + case 0: + break; + case 1: + if (p->is_in_place) /* fft is in-place */ + fftw(p->plans[0], howmany, in, istride, idist, + work, 1, 0); + else + fftw(p->plans[0], howmany, in, istride, idist, + out, ostride, odist); + break; + default: /* rank >= 2 */ + { + if (p->is_in_place) { + out = in; + ostride = istride; + odist = idist; + } + if (howmany > 1 && odist < ostride) + fftwnd_aux_howmany(p, 0, howmany, + in, istride, idist, + out, ostride, odist, + work); + else { + int i; + + for (i = 0; i < howmany; ++i) + fftwnd_aux(p, 0, + in + i * idist, istride, + out + i * odist, ostride, + work); + } + } + } + + if (p->nwork && !p->work) + fftw_free(work); + +} + +void fftwnd_one(fftwnd_plan p, fftw_complex *in, fftw_complex *out) +{ + fftwnd(p, 1, in, 1, 1, out, 1, 1); +} diff --git a/src/fftw/fn_1.c b/src/fftw/fn_1.c new file mode 100644 index 0000000..e0b6d03 --- /dev/null +++ b/src/fftw/fn_1.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:37 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 1 */ + +/* + * This function contains 0 FP additions, 0 FP multiplications, + * (or, 0 additions, 0 multiplications, 0 fused multiply/add), + * 2 stack variables, and 4 memory accesses + */ + +/* + * Generator Id's : + * $Id: fn_1.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_1.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_1.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_1(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp2; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + c_re(output[0]) = tmp1; + tmp2 = c_im(input[0]); + c_im(output[0]) = tmp2; +} + +fftw_codelet_desc fftw_no_twiddle_1_desc = { + "fftw_no_twiddle_1", + (void (*)()) fftw_no_twiddle_1, + 1, + FFTW_FORWARD, + FFTW_NOTW, + 23, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_10.c b/src/fftw/fn_10.c new file mode 100644 index 0000000..8010fb6 --- /dev/null +++ b/src/fftw/fn_10.c @@ -0,0 +1,285 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:38 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 10 */ + +/* + * This function contains 84 FP additions, 24 FP multiplications, + * (or, 72 additions, 12 multiplications, 12 fused multiply/add), + * 36 stack variables, and 40 memory accesses + */ +static const fftw_real K250000000 = +FFTW_KONST(+0.250000000000000000000000000000000000000000000); +static const fftw_real K559016994 = +FFTW_KONST(+0.559016994374947424102293417182819058860154590); +static const fftw_real K587785252 = +FFTW_KONST(+0.587785252292473129168705954639072768597652438); +static const fftw_real K951056516 = +FFTW_KONST(+0.951056516295153572116439333379382143405698634); + +/* + * Generator Id's : + * $Id: fn_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_10(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp3; + fftw_real tmp19; + fftw_real tmp64; + fftw_real tmp76; + fftw_real tmp68; + fftw_real tmp69; + fftw_real tmp10; + fftw_real tmp17; + fftw_real tmp18; + fftw_real tmp74; + fftw_real tmp73; + fftw_real tmp22; + fftw_real tmp25; + fftw_real tmp26; + fftw_real tmp36; + fftw_real tmp43; + fftw_real tmp59; + fftw_real tmp60; + fftw_real tmp65; + fftw_real tmp52; + fftw_real tmp55; + fftw_real tmp77; + fftw_real tmp78; + fftw_real tmp79; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp62; + fftw_real tmp63; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[5 * istride]); + tmp3 = tmp1 - tmp2; + tmp19 = tmp1 + tmp2; + tmp62 = c_im(input[0]); + tmp63 = c_im(input[5 * istride]); + tmp64 = tmp62 - tmp63; + tmp76 = tmp62 + tmp63; + } + { + fftw_real tmp6; + fftw_real tmp20; + fftw_real tmp16; + fftw_real tmp24; + fftw_real tmp9; + fftw_real tmp21; + fftw_real tmp13; + fftw_real tmp23; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp14; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[2 * istride]); + tmp5 = c_re(input[7 * istride]); + tmp6 = tmp4 - tmp5; + tmp20 = tmp4 + tmp5; + tmp14 = c_re(input[6 * istride]); + tmp15 = c_re(input[istride]); + tmp16 = tmp14 - tmp15; + tmp24 = tmp14 + tmp15; + } + { + fftw_real tmp7; + fftw_real tmp8; + fftw_real tmp11; + fftw_real tmp12; + ASSERT_ALIGNED_DOUBLE; + tmp7 = c_re(input[8 * istride]); + tmp8 = c_re(input[3 * istride]); + tmp9 = tmp7 - tmp8; + tmp21 = tmp7 + tmp8; + tmp11 = c_re(input[4 * istride]); + tmp12 = c_re(input[9 * istride]); + tmp13 = tmp11 - tmp12; + tmp23 = tmp11 + tmp12; + } + tmp68 = tmp6 - tmp9; + tmp69 = tmp13 - tmp16; + tmp10 = tmp6 + tmp9; + tmp17 = tmp13 + tmp16; + tmp18 = tmp10 + tmp17; + tmp74 = tmp20 - tmp21; + tmp73 = tmp23 - tmp24; + tmp22 = tmp20 + tmp21; + tmp25 = tmp23 + tmp24; + tmp26 = tmp22 + tmp25; + } + { + fftw_real tmp32; + fftw_real tmp53; + fftw_real tmp42; + fftw_real tmp51; + fftw_real tmp35; + fftw_real tmp54; + fftw_real tmp39; + fftw_real tmp50; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp30; + fftw_real tmp31; + fftw_real tmp40; + fftw_real tmp41; + ASSERT_ALIGNED_DOUBLE; + tmp30 = c_im(input[2 * istride]); + tmp31 = c_im(input[7 * istride]); + tmp32 = tmp30 - tmp31; + tmp53 = tmp30 + tmp31; + tmp40 = c_im(input[6 * istride]); + tmp41 = c_im(input[istride]); + tmp42 = tmp40 - tmp41; + tmp51 = tmp40 + tmp41; + } + { + fftw_real tmp33; + fftw_real tmp34; + fftw_real tmp37; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp33 = c_im(input[8 * istride]); + tmp34 = c_im(input[3 * istride]); + tmp35 = tmp33 - tmp34; + tmp54 = tmp33 + tmp34; + tmp37 = c_im(input[4 * istride]); + tmp38 = c_im(input[9 * istride]); + tmp39 = tmp37 - tmp38; + tmp50 = tmp37 + tmp38; + } + tmp36 = tmp32 - tmp35; + tmp43 = tmp39 - tmp42; + tmp59 = tmp32 + tmp35; + tmp60 = tmp39 + tmp42; + tmp65 = tmp59 + tmp60; + tmp52 = tmp50 - tmp51; + tmp55 = tmp53 - tmp54; + tmp77 = tmp53 + tmp54; + tmp78 = tmp50 + tmp51; + tmp79 = tmp77 + tmp78; + } + c_re(output[5 * ostride]) = tmp3 + tmp18; + { + fftw_real tmp44; + fftw_real tmp46; + fftw_real tmp29; + fftw_real tmp45; + fftw_real tmp27; + fftw_real tmp28; + ASSERT_ALIGNED_DOUBLE; + tmp44 = (K951056516 * tmp36) + (K587785252 * tmp43); + tmp46 = (K951056516 * tmp43) - (K587785252 * tmp36); + tmp27 = K559016994 * (tmp10 - tmp17); + tmp28 = tmp3 - (K250000000 * tmp18); + tmp29 = tmp27 + tmp28; + tmp45 = tmp28 - tmp27; + c_re(output[9 * ostride]) = tmp29 - tmp44; + c_re(output[ostride]) = tmp29 + tmp44; + c_re(output[7 * ostride]) = tmp45 - tmp46; + c_re(output[3 * ostride]) = tmp45 + tmp46; + } + c_re(output[0]) = tmp19 + tmp26; + { + fftw_real tmp56; + fftw_real tmp58; + fftw_real tmp49; + fftw_real tmp57; + fftw_real tmp47; + fftw_real tmp48; + ASSERT_ALIGNED_DOUBLE; + tmp56 = (K951056516 * tmp52) - (K587785252 * tmp55); + tmp58 = (K951056516 * tmp55) + (K587785252 * tmp52); + tmp47 = tmp19 - (K250000000 * tmp26); + tmp48 = K559016994 * (tmp22 - tmp25); + tmp49 = tmp47 - tmp48; + tmp57 = tmp48 + tmp47; + c_re(output[2 * ostride]) = tmp49 - tmp56; + c_re(output[8 * ostride]) = tmp49 + tmp56; + c_re(output[4 * ostride]) = tmp57 - tmp58; + c_re(output[6 * ostride]) = tmp57 + tmp58; + } + c_im(output[5 * ostride]) = tmp65 + tmp64; + { + fftw_real tmp70; + fftw_real tmp72; + fftw_real tmp67; + fftw_real tmp71; + fftw_real tmp61; + fftw_real tmp66; + ASSERT_ALIGNED_DOUBLE; + tmp70 = (K951056516 * tmp68) + (K587785252 * tmp69); + tmp72 = (K951056516 * tmp69) - (K587785252 * tmp68); + tmp61 = K559016994 * (tmp59 - tmp60); + tmp66 = tmp64 - (K250000000 * tmp65); + tmp67 = tmp61 + tmp66; + tmp71 = tmp66 - tmp61; + c_im(output[ostride]) = tmp67 - tmp70; + c_im(output[9 * ostride]) = tmp70 + tmp67; + c_im(output[3 * ostride]) = tmp71 - tmp72; + c_im(output[7 * ostride]) = tmp72 + tmp71; + } + c_im(output[0]) = tmp79 + tmp76; + { + fftw_real tmp75; + fftw_real tmp83; + fftw_real tmp82; + fftw_real tmp84; + fftw_real tmp80; + fftw_real tmp81; + ASSERT_ALIGNED_DOUBLE; + tmp75 = (K951056516 * tmp73) - (K587785252 * tmp74); + tmp83 = (K951056516 * tmp74) + (K587785252 * tmp73); + tmp80 = tmp76 - (K250000000 * tmp79); + tmp81 = K559016994 * (tmp77 - tmp78); + tmp82 = tmp80 - tmp81; + tmp84 = tmp81 + tmp80; + c_im(output[2 * ostride]) = tmp75 + tmp82; + c_im(output[8 * ostride]) = tmp82 - tmp75; + c_im(output[4 * ostride]) = tmp83 + tmp84; + c_im(output[6 * ostride]) = tmp84 - tmp83; + } +} + +fftw_codelet_desc fftw_no_twiddle_10_desc = { + "fftw_no_twiddle_10", + (void (*)()) fftw_no_twiddle_10, + 10, + FFTW_FORWARD, + FFTW_NOTW, + 221, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_11.c b/src/fftw/fn_11.c new file mode 100644 index 0000000..3342319 --- /dev/null +++ b/src/fftw/fn_11.c @@ -0,0 +1,312 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:38 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 11 */ + +/* + * This function contains 140 FP additions, 100 FP multiplications, + * (or, 140 additions, 100 multiplications, 0 fused multiply/add), + * 30 stack variables, and 44 memory accesses + */ +static const fftw_real K142314838 = +FFTW_KONST(+0.142314838273285140443792668616369668791051361); +static const fftw_real K654860733 = +FFTW_KONST(+0.654860733945285064056925072466293553183791199); +static const fftw_real K959492973 = +FFTW_KONST(+0.959492973614497389890368057066327699062454848); +static const fftw_real K415415013 = +FFTW_KONST(+0.415415013001886425529274149229623203524004910); +static const fftw_real K841253532 = +FFTW_KONST(+0.841253532831181168861811648919367717513292498); +static const fftw_real K909631995 = +FFTW_KONST(+0.909631995354518371411715383079028460060241051); +static const fftw_real K281732556 = +FFTW_KONST(+0.281732556841429697711417915346616899035777899); +static const fftw_real K755749574 = +FFTW_KONST(+0.755749574354258283774035843972344420179717445); +static const fftw_real K989821441 = +FFTW_KONST(+0.989821441880932732376092037776718787376519372); +static const fftw_real K540640817 = +FFTW_KONST(+0.540640817455597582107635954318691695431770608); + +/* + * Generator Id's : + * $Id: fn_11.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_11.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_11.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_11(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp50; + fftw_real tmp4; + fftw_real tmp42; + fftw_real tmp20; + fftw_real tmp53; + fftw_real tmp29; + fftw_real tmp49; + fftw_real tmp7; + fftw_real tmp46; + fftw_real tmp10; + fftw_real tmp43; + fftw_real tmp23; + fftw_real tmp52; + fftw_real tmp13; + fftw_real tmp45; + fftw_real tmp32; + fftw_real tmp48; + fftw_real tmp26; + fftw_real tmp51; + fftw_real tmp16; + fftw_real tmp44; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp18; + fftw_real tmp19; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp50 = c_im(input[0]); + tmp2 = c_re(input[istride]); + tmp3 = c_re(input[10 * istride]); + tmp4 = tmp2 + tmp3; + tmp42 = tmp3 - tmp2; + tmp18 = c_im(input[istride]); + tmp19 = c_im(input[10 * istride]); + tmp20 = tmp18 - tmp19; + tmp53 = tmp18 + tmp19; + { + fftw_real tmp27; + fftw_real tmp28; + fftw_real tmp5; + fftw_real tmp6; + ASSERT_ALIGNED_DOUBLE; + tmp27 = c_im(input[2 * istride]); + tmp28 = c_im(input[9 * istride]); + tmp29 = tmp27 - tmp28; + tmp49 = tmp27 + tmp28; + tmp5 = c_re(input[2 * istride]); + tmp6 = c_re(input[9 * istride]); + tmp7 = tmp5 + tmp6; + tmp46 = tmp6 - tmp5; + } + } + { + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp30; + fftw_real tmp31; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(input[3 * istride]); + tmp9 = c_re(input[8 * istride]); + tmp10 = tmp8 + tmp9; + tmp43 = tmp9 - tmp8; + { + fftw_real tmp21; + fftw_real tmp22; + fftw_real tmp11; + fftw_real tmp12; + ASSERT_ALIGNED_DOUBLE; + tmp21 = c_im(input[3 * istride]); + tmp22 = c_im(input[8 * istride]); + tmp23 = tmp21 - tmp22; + tmp52 = tmp21 + tmp22; + tmp11 = c_re(input[4 * istride]); + tmp12 = c_re(input[7 * istride]); + tmp13 = tmp11 + tmp12; + tmp45 = tmp12 - tmp11; + } + tmp30 = c_im(input[4 * istride]); + tmp31 = c_im(input[7 * istride]); + tmp32 = tmp30 - tmp31; + tmp48 = tmp30 + tmp31; + { + fftw_real tmp24; + fftw_real tmp25; + fftw_real tmp14; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp24 = c_im(input[5 * istride]); + tmp25 = c_im(input[6 * istride]); + tmp26 = tmp24 - tmp25; + tmp51 = tmp24 + tmp25; + tmp14 = c_re(input[5 * istride]); + tmp15 = c_re(input[6 * istride]); + tmp16 = tmp14 + tmp15; + tmp44 = tmp15 - tmp14; + } + } + { + fftw_real tmp35; + fftw_real tmp34; + fftw_real tmp59; + fftw_real tmp60; + ASSERT_ALIGNED_DOUBLE; + c_re(output[0]) = tmp1 + tmp4 + tmp7 + tmp10 + tmp13 + tmp16; + { + fftw_real tmp41; + fftw_real tmp40; + fftw_real tmp37; + fftw_real tmp36; + ASSERT_ALIGNED_DOUBLE; + tmp41 = + (K540640817 * tmp20) + (K989821441 * tmp23) + + (K755749574 * tmp32) + (K281732556 * tmp26) + + (K909631995 * tmp29); + tmp40 = + tmp1 + (K841253532 * tmp4) + (K415415013 * tmp7) - + (K959492973 * tmp16) - (K654860733 * tmp13) - + (K142314838 * tmp10); + c_re(output[10 * ostride]) = tmp40 - tmp41; + c_re(output[ostride]) = tmp40 + tmp41; + tmp37 = + (K755749574 * tmp20) + (K540640817 * tmp23) + + (K281732556 * tmp32) - (K989821441 * tmp29) - + (K909631995 * tmp26); + tmp36 = + tmp1 + (K841253532 * tmp10) + (K415415013 * tmp16) - + (K959492973 * tmp13) - (K142314838 * tmp7) - + (K654860733 * tmp4); + c_re(output[7 * ostride]) = tmp36 - tmp37; + c_re(output[4 * ostride]) = tmp36 + tmp37; + } + tmp35 = + (K989821441 * tmp20) + (K540640817 * tmp32) + + (K755749574 * tmp26) - (K281732556 * tmp29) - + (K909631995 * tmp23); + tmp34 = + tmp1 + (K415415013 * tmp10) + (K841253532 * tmp13) - + (K654860733 * tmp16) - (K959492973 * tmp7) - + (K142314838 * tmp4); + c_re(output[8 * ostride]) = tmp34 - tmp35; + c_re(output[3 * ostride]) = tmp34 + tmp35; + { + fftw_real tmp39; + fftw_real tmp38; + fftw_real tmp33; + fftw_real tmp17; + ASSERT_ALIGNED_DOUBLE; + tmp39 = + (K909631995 * tmp20) + (K755749574 * tmp29) - + (K540640817 * tmp26) - (K989821441 * tmp32) - + (K281732556 * tmp23); + tmp38 = + tmp1 + (K415415013 * tmp4) + (K841253532 * tmp16) - + (K142314838 * tmp13) - (K959492973 * tmp10) - + (K654860733 * tmp7); + c_re(output[9 * ostride]) = tmp38 - tmp39; + c_re(output[2 * ostride]) = tmp38 + tmp39; + tmp33 = + (K281732556 * tmp20) + (K755749574 * tmp23) + + (K989821441 * tmp26) - (K540640817 * tmp29) - + (K909631995 * tmp32); + tmp17 = + tmp1 + (K841253532 * tmp7) + (K415415013 * tmp13) - + (K142314838 * tmp16) - (K654860733 * tmp10) - + (K959492973 * tmp4); + c_re(output[6 * ostride]) = tmp17 - tmp33; + c_re(output[5 * ostride]) = tmp17 + tmp33; + } + c_im(output[0]) = tmp53 + tmp52 + tmp48 + tmp51 + tmp49 + tmp50; + { + fftw_real tmp47; + fftw_real tmp54; + fftw_real tmp57; + fftw_real tmp58; + ASSERT_ALIGNED_DOUBLE; + tmp47 = + (K281732556 * tmp42) + (K755749574 * tmp43) + + (K989821441 * tmp44) - (K909631995 * tmp45) - + (K540640817 * tmp46); + tmp54 = + (K415415013 * tmp48) + (K841253532 * tmp49) + tmp50 - + (K142314838 * tmp51) - (K654860733 * tmp52) - + (K959492973 * tmp53); + c_im(output[5 * ostride]) = tmp47 + tmp54; + c_im(output[6 * ostride]) = tmp54 - tmp47; + tmp57 = + (K540640817 * tmp42) + (K909631995 * tmp46) + + (K989821441 * tmp43) + (K755749574 * tmp45) + + (K281732556 * tmp44); + tmp58 = + (K841253532 * tmp53) + (K415415013 * tmp49) + tmp50 - + (K959492973 * tmp51) - (K654860733 * tmp48) - + (K142314838 * tmp52); + c_im(output[ostride]) = tmp57 + tmp58; + c_im(output[10 * ostride]) = tmp58 - tmp57; + } + tmp59 = + (K909631995 * tmp42) + (K755749574 * tmp46) - + (K540640817 * tmp44) - (K989821441 * tmp45) - + (K281732556 * tmp43); + tmp60 = + (K415415013 * tmp53) + (K841253532 * tmp51) + tmp50 - + (K654860733 * tmp49) - (K142314838 * tmp48) - + (K959492973 * tmp52); + c_im(output[2 * ostride]) = tmp59 + tmp60; + c_im(output[9 * ostride]) = tmp60 - tmp59; + { + fftw_real tmp55; + fftw_real tmp56; + fftw_real tmp61; + fftw_real tmp62; + ASSERT_ALIGNED_DOUBLE; + tmp55 = + (K989821441 * tmp42) + (K540640817 * tmp45) + + (K755749574 * tmp44) - (K909631995 * tmp43) - + (K281732556 * tmp46); + tmp56 = + (K415415013 * tmp52) + (K841253532 * tmp48) + tmp50 - + (K959492973 * tmp49) - (K654860733 * tmp51) - + (K142314838 * tmp53); + c_im(output[3 * ostride]) = tmp55 + tmp56; + c_im(output[8 * ostride]) = tmp56 - tmp55; + tmp61 = + (K755749574 * tmp42) + (K540640817 * tmp43) + + (K281732556 * tmp45) - (K909631995 * tmp44) - + (K989821441 * tmp46); + tmp62 = + (K841253532 * tmp52) + (K415415013 * tmp51) + tmp50 - + (K142314838 * tmp49) - (K959492973 * tmp48) - + (K654860733 * tmp53); + c_im(output[4 * ostride]) = tmp61 + tmp62; + c_im(output[7 * ostride]) = tmp62 - tmp61; + } + } +} + +fftw_codelet_desc fftw_no_twiddle_11_desc = { + "fftw_no_twiddle_11", + (void (*)()) fftw_no_twiddle_11, + 11, + FFTW_FORWARD, + FFTW_NOTW, + 243, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_12.c b/src/fftw/fn_12.c new file mode 100644 index 0000000..1a149ba --- /dev/null +++ b/src/fftw/fn_12.c @@ -0,0 +1,324 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:40 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 12 */ + +/* + * This function contains 96 FP additions, 16 FP multiplications, + * (or, 88 additions, 8 multiplications, 8 fused multiply/add), + * 40 stack variables, and 48 memory accesses + */ +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); + +/* + * Generator Id's : + * $Id: fn_12.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_12.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_12.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_12(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp5; + fftw_real tmp53; + fftw_real tmp36; + fftw_real tmp28; + fftw_real tmp35; + fftw_real tmp54; + fftw_real tmp10; + fftw_real tmp56; + fftw_real tmp39; + fftw_real tmp33; + fftw_real tmp38; + fftw_real tmp57; + fftw_real tmp16; + fftw_real tmp42; + fftw_real tmp72; + fftw_real tmp45; + fftw_real tmp92; + fftw_real tmp75; + fftw_real tmp21; + fftw_real tmp47; + fftw_real tmp77; + fftw_real tmp50; + fftw_real tmp93; + fftw_real tmp80; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[4 * istride]); + tmp3 = c_re(input[8 * istride]); + tmp4 = tmp2 + tmp3; + tmp5 = tmp1 + tmp4; + tmp53 = tmp1 - (K500000000 * tmp4); + tmp36 = K866025403 * (tmp3 - tmp2); + } + { + fftw_real tmp24; + fftw_real tmp25; + fftw_real tmp26; + fftw_real tmp27; + ASSERT_ALIGNED_DOUBLE; + tmp24 = c_im(input[0]); + tmp25 = c_im(input[4 * istride]); + tmp26 = c_im(input[8 * istride]); + tmp27 = tmp25 + tmp26; + tmp28 = tmp24 + tmp27; + tmp35 = tmp24 - (K500000000 * tmp27); + tmp54 = K866025403 * (tmp25 - tmp26); + } + { + fftw_real tmp6; + fftw_real tmp7; + fftw_real tmp8; + fftw_real tmp9; + ASSERT_ALIGNED_DOUBLE; + tmp6 = c_re(input[6 * istride]); + tmp7 = c_re(input[10 * istride]); + tmp8 = c_re(input[2 * istride]); + tmp9 = tmp7 + tmp8; + tmp10 = tmp6 + tmp9; + tmp56 = tmp6 - (K500000000 * tmp9); + tmp39 = K866025403 * (tmp8 - tmp7); + } + { + fftw_real tmp29; + fftw_real tmp30; + fftw_real tmp31; + fftw_real tmp32; + ASSERT_ALIGNED_DOUBLE; + tmp29 = c_im(input[6 * istride]); + tmp30 = c_im(input[10 * istride]); + tmp31 = c_im(input[2 * istride]); + tmp32 = tmp30 + tmp31; + tmp33 = tmp29 + tmp32; + tmp38 = tmp29 - (K500000000 * tmp32); + tmp57 = K866025403 * (tmp30 - tmp31); + } + { + fftw_real tmp12; + fftw_real tmp13; + fftw_real tmp14; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp12 = c_re(input[3 * istride]); + tmp13 = c_re(input[7 * istride]); + tmp14 = c_re(input[11 * istride]); + tmp15 = tmp13 + tmp14; + tmp16 = tmp12 + tmp15; + tmp42 = tmp12 - (K500000000 * tmp15); + tmp72 = K866025403 * (tmp14 - tmp13); + } + { + fftw_real tmp73; + fftw_real tmp43; + fftw_real tmp44; + fftw_real tmp74; + ASSERT_ALIGNED_DOUBLE; + tmp73 = c_im(input[3 * istride]); + tmp43 = c_im(input[7 * istride]); + tmp44 = c_im(input[11 * istride]); + tmp74 = tmp43 + tmp44; + tmp45 = K866025403 * (tmp43 - tmp44); + tmp92 = tmp73 + tmp74; + tmp75 = tmp73 - (K500000000 * tmp74); + } + { + fftw_real tmp17; + fftw_real tmp18; + fftw_real tmp19; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + tmp17 = c_re(input[9 * istride]); + tmp18 = c_re(input[istride]); + tmp19 = c_re(input[5 * istride]); + tmp20 = tmp18 + tmp19; + tmp21 = tmp17 + tmp20; + tmp47 = tmp17 - (K500000000 * tmp20); + tmp77 = K866025403 * (tmp19 - tmp18); + } + { + fftw_real tmp78; + fftw_real tmp48; + fftw_real tmp49; + fftw_real tmp79; + ASSERT_ALIGNED_DOUBLE; + tmp78 = c_im(input[9 * istride]); + tmp48 = c_im(input[istride]); + tmp49 = c_im(input[5 * istride]); + tmp79 = tmp48 + tmp49; + tmp50 = K866025403 * (tmp48 - tmp49); + tmp93 = tmp78 + tmp79; + tmp80 = tmp78 - (K500000000 * tmp79); + } + { + fftw_real tmp11; + fftw_real tmp22; + fftw_real tmp23; + fftw_real tmp34; + ASSERT_ALIGNED_DOUBLE; + tmp11 = tmp5 + tmp10; + tmp22 = tmp16 + tmp21; + c_re(output[6 * ostride]) = tmp11 - tmp22; + c_re(output[0]) = tmp11 + tmp22; + { + fftw_real tmp91; + fftw_real tmp94; + fftw_real tmp95; + fftw_real tmp96; + ASSERT_ALIGNED_DOUBLE; + tmp91 = tmp5 - tmp10; + tmp94 = tmp92 - tmp93; + c_re(output[3 * ostride]) = tmp91 - tmp94; + c_re(output[9 * ostride]) = tmp91 + tmp94; + tmp95 = tmp28 + tmp33; + tmp96 = tmp92 + tmp93; + c_im(output[6 * ostride]) = tmp95 - tmp96; + c_im(output[0]) = tmp95 + tmp96; + } + tmp23 = tmp16 - tmp21; + tmp34 = tmp28 - tmp33; + c_im(output[3 * ostride]) = tmp23 + tmp34; + c_im(output[9 * ostride]) = tmp34 - tmp23; + { + fftw_real tmp63; + fftw_real tmp83; + fftw_real tmp82; + fftw_real tmp84; + fftw_real tmp66; + fftw_real tmp70; + fftw_real tmp69; + fftw_real tmp71; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp61; + fftw_real tmp62; + fftw_real tmp76; + fftw_real tmp81; + ASSERT_ALIGNED_DOUBLE; + tmp61 = tmp36 + tmp35; + tmp62 = tmp39 + tmp38; + tmp63 = tmp61 - tmp62; + tmp83 = tmp61 + tmp62; + tmp76 = tmp72 + tmp75; + tmp81 = tmp77 + tmp80; + tmp82 = tmp76 - tmp81; + tmp84 = tmp76 + tmp81; + } + { + fftw_real tmp64; + fftw_real tmp65; + fftw_real tmp67; + fftw_real tmp68; + ASSERT_ALIGNED_DOUBLE; + tmp64 = tmp42 + tmp45; + tmp65 = tmp47 + tmp50; + tmp66 = tmp64 - tmp65; + tmp70 = tmp64 + tmp65; + tmp67 = tmp53 + tmp54; + tmp68 = tmp56 + tmp57; + tmp69 = tmp67 + tmp68; + tmp71 = tmp67 - tmp68; + } + c_im(output[ostride]) = tmp63 - tmp66; + c_im(output[7 * ostride]) = tmp63 + tmp66; + c_re(output[10 * ostride]) = tmp69 - tmp70; + c_re(output[4 * ostride]) = tmp69 + tmp70; + c_re(output[7 * ostride]) = tmp71 - tmp82; + c_re(output[ostride]) = tmp71 + tmp82; + c_im(output[10 * ostride]) = tmp83 - tmp84; + c_im(output[4 * ostride]) = tmp83 + tmp84; + } + { + fftw_real tmp41; + fftw_real tmp89; + fftw_real tmp88; + fftw_real tmp90; + fftw_real tmp52; + fftw_real tmp60; + fftw_real tmp59; + fftw_real tmp85; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp37; + fftw_real tmp40; + fftw_real tmp86; + fftw_real tmp87; + ASSERT_ALIGNED_DOUBLE; + tmp37 = tmp35 - tmp36; + tmp40 = tmp38 - tmp39; + tmp41 = tmp37 - tmp40; + tmp89 = tmp37 + tmp40; + tmp86 = tmp75 - tmp72; + tmp87 = tmp80 - tmp77; + tmp88 = tmp86 - tmp87; + tmp90 = tmp86 + tmp87; + } + { + fftw_real tmp46; + fftw_real tmp51; + fftw_real tmp55; + fftw_real tmp58; + ASSERT_ALIGNED_DOUBLE; + tmp46 = tmp42 - tmp45; + tmp51 = tmp47 - tmp50; + tmp52 = tmp46 - tmp51; + tmp60 = tmp46 + tmp51; + tmp55 = tmp53 - tmp54; + tmp58 = tmp56 - tmp57; + tmp59 = tmp55 + tmp58; + tmp85 = tmp55 - tmp58; + } + c_im(output[5 * ostride]) = tmp41 - tmp52; + c_im(output[11 * ostride]) = tmp41 + tmp52; + c_re(output[2 * ostride]) = tmp59 - tmp60; + c_re(output[8 * ostride]) = tmp59 + tmp60; + c_re(output[11 * ostride]) = tmp85 - tmp88; + c_re(output[5 * ostride]) = tmp85 + tmp88; + c_im(output[2 * ostride]) = tmp89 - tmp90; + c_im(output[8 * ostride]) = tmp89 + tmp90; + } + } +} + +fftw_codelet_desc fftw_no_twiddle_12_desc = { + "fftw_no_twiddle_12", + (void (*)()) fftw_no_twiddle_12, + 12, + FFTW_FORWARD, + FFTW_NOTW, + 265, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_13.c b/src/fftw/fn_13.c new file mode 100644 index 0000000..7b41c37 --- /dev/null +++ b/src/fftw/fn_13.c @@ -0,0 +1,546 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:40 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 13 */ + +/* + * This function contains 176 FP additions, 68 FP multiplications, + * (or, 138 additions, 30 multiplications, 38 fused multiply/add), + * 50 stack variables, and 52 memory accesses + */ +static const fftw_real K1_732050807 = +FFTW_KONST(+1.732050807568877293527446341505872366942805254); +static const fftw_real K256247671 = +FFTW_KONST(+0.256247671582936600958684654061725059144125175); +static const fftw_real K156891391 = +FFTW_KONST(+0.156891391051584611046832726756003269660212636); +static const fftw_real K300238635 = +FFTW_KONST(+0.300238635966332641462884626667381504676006424); +static const fftw_real K011599105 = +FFTW_KONST(+0.011599105605768290721655456654083252189827041); +static const fftw_real K174138601 = +FFTW_KONST(+0.174138601152135905005660794929264742616964676); +static const fftw_real K575140729 = +FFTW_KONST(+0.575140729474003121368385547455453388461001608); +static const fftw_real K2_000000000 = +FFTW_KONST(+2.000000000000000000000000000000000000000000000); +static const fftw_real K083333333 = +FFTW_KONST(+0.083333333333333333333333333333333333333333333); +static const fftw_real K075902986 = +FFTW_KONST(+0.075902986037193865983102897245103540356428373); +static const fftw_real K251768516 = +FFTW_KONST(+0.251768516431883313623436926934233488546674281); +static const fftw_real K258260390 = +FFTW_KONST(+0.258260390311744861420450644284508567852516811); +static const fftw_real K132983124 = +FFTW_KONST(+0.132983124607418643793760531921092974399165133); +static const fftw_real K265966249 = +FFTW_KONST(+0.265966249214837287587521063842185948798330267); +static const fftw_real K387390585 = +FFTW_KONST(+0.387390585467617292130675966426762851778775217); +static const fftw_real K503537032 = +FFTW_KONST(+0.503537032863766627246873853868466977093348562); +static const fftw_real K113854479 = +FFTW_KONST(+0.113854479055790798974654345867655310534642560); +static const fftw_real K300462606 = +FFTW_KONST(+0.300462606288665774426601772289207995520941381); +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); + +/* + * Generator Id's : + * $Id: fn_13.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_13.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_13.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_13(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp146; + fftw_real tmp115; + fftw_real tmp120; + fftw_real tmp125; + fftw_real tmp31; + fftw_real tmp40; + fftw_real tmp116; + fftw_real tmp24; + fftw_real tmp43; + fftw_real tmp36; + fftw_real tmp41; + fftw_real tmp123; + fftw_real tmp126; + fftw_real tmp56; + fftw_real tmp141; + fftw_real tmp147; + fftw_real tmp134; + fftw_real tmp143; + fftw_real tmp67; + fftw_real tmp73; + fftw_real tmp82; + fftw_real tmp137; + fftw_real tmp144; + fftw_real tmp80; + fftw_real tmp83; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp146 = c_im(input[0]); + { + fftw_real tmp15; + fftw_real tmp113; + fftw_real tmp18; + fftw_real tmp29; + fftw_real tmp21; + fftw_real tmp28; + fftw_real tmp22; + fftw_real tmp114; + fftw_real tmp6; + fftw_real tmp32; + fftw_real tmp25; + fftw_real tmp11; + fftw_real tmp33; + fftw_real tmp26; + fftw_real tmp13; + fftw_real tmp14; + fftw_real tmp12; + fftw_real tmp23; + ASSERT_ALIGNED_DOUBLE; + tmp13 = c_re(input[8 * istride]); + tmp14 = c_re(input[5 * istride]); + tmp15 = tmp13 + tmp14; + tmp113 = tmp13 - tmp14; + { + fftw_real tmp16; + fftw_real tmp17; + fftw_real tmp19; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + tmp16 = c_re(input[6 * istride]); + tmp17 = c_re(input[11 * istride]); + tmp18 = tmp16 + tmp17; + tmp29 = tmp16 - tmp17; + tmp19 = c_re(input[2 * istride]); + tmp20 = c_re(input[7 * istride]); + tmp21 = tmp19 + tmp20; + tmp28 = tmp19 - tmp20; + } + tmp22 = tmp18 + tmp21; + tmp114 = tmp29 + tmp28; + { + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp4; + fftw_real tmp5; + ASSERT_ALIGNED_DOUBLE; + tmp2 = c_re(input[istride]); + tmp3 = c_re(input[3 * istride]); + tmp4 = c_re(input[9 * istride]); + tmp5 = tmp3 + tmp4; + tmp6 = tmp2 + tmp5; + tmp32 = tmp2 - (K500000000 * tmp5); + tmp25 = tmp3 - tmp4; + } + { + fftw_real tmp7; + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp7 = c_re(input[12 * istride]); + tmp8 = c_re(input[4 * istride]); + tmp9 = c_re(input[10 * istride]); + tmp10 = tmp8 + tmp9; + tmp11 = tmp7 + tmp10; + tmp33 = tmp7 - (K500000000 * tmp10); + tmp26 = tmp8 - tmp9; + } + tmp115 = tmp113 - tmp114; + { + fftw_real tmp118; + fftw_real tmp119; + fftw_real tmp27; + fftw_real tmp30; + ASSERT_ALIGNED_DOUBLE; + tmp118 = tmp113 + (K500000000 * tmp114); + tmp119 = K866025403 * (tmp25 + tmp26); + tmp120 = tmp118 - tmp119; + tmp125 = tmp119 + tmp118; + tmp27 = tmp25 - tmp26; + tmp30 = tmp28 - tmp29; + tmp31 = tmp27 + tmp30; + tmp40 = tmp30 - tmp27; + } + tmp116 = tmp6 - tmp11; + tmp12 = tmp6 + tmp11; + tmp23 = tmp15 + tmp22; + tmp24 = tmp12 + tmp23; + tmp43 = K300462606 * (tmp12 - tmp23); + { + fftw_real tmp34; + fftw_real tmp35; + fftw_real tmp121; + fftw_real tmp122; + ASSERT_ALIGNED_DOUBLE; + tmp34 = tmp32 + tmp33; + tmp35 = tmp15 - (K500000000 * tmp22); + tmp36 = tmp34 + tmp35; + tmp41 = tmp34 - tmp35; + tmp121 = tmp32 - tmp33; + tmp122 = K866025403 * (tmp18 - tmp21); + tmp123 = tmp121 + tmp122; + tmp126 = tmp121 - tmp122; + } + } + { + fftw_real tmp59; + fftw_real tmp131; + fftw_real tmp62; + fftw_real tmp78; + fftw_real tmp65; + fftw_real tmp77; + fftw_real tmp66; + fftw_real tmp132; + fftw_real tmp50; + fftw_real tmp74; + fftw_real tmp69; + fftw_real tmp55; + fftw_real tmp75; + fftw_real tmp70; + fftw_real tmp57; + fftw_real tmp58; + fftw_real tmp71; + fftw_real tmp72; + ASSERT_ALIGNED_DOUBLE; + tmp57 = c_im(input[8 * istride]); + tmp58 = c_im(input[5 * istride]); + tmp59 = tmp57 - tmp58; + tmp131 = tmp57 + tmp58; + { + fftw_real tmp60; + fftw_real tmp61; + fftw_real tmp63; + fftw_real tmp64; + ASSERT_ALIGNED_DOUBLE; + tmp60 = c_im(input[6 * istride]); + tmp61 = c_im(input[11 * istride]); + tmp62 = tmp60 - tmp61; + tmp78 = tmp60 + tmp61; + tmp63 = c_im(input[2 * istride]); + tmp64 = c_im(input[7 * istride]); + tmp65 = tmp63 - tmp64; + tmp77 = tmp63 + tmp64; + } + tmp66 = tmp62 + tmp65; + tmp132 = tmp78 + tmp77; + { + fftw_real tmp46; + fftw_real tmp47; + fftw_real tmp48; + fftw_real tmp49; + ASSERT_ALIGNED_DOUBLE; + tmp46 = c_im(input[istride]); + tmp47 = c_im(input[3 * istride]); + tmp48 = c_im(input[9 * istride]); + tmp49 = tmp47 + tmp48; + tmp50 = tmp46 + tmp49; + tmp74 = tmp46 - (K500000000 * tmp49); + tmp69 = tmp47 - tmp48; + } + { + fftw_real tmp51; + fftw_real tmp52; + fftw_real tmp53; + fftw_real tmp54; + ASSERT_ALIGNED_DOUBLE; + tmp51 = c_im(input[12 * istride]); + tmp52 = c_im(input[4 * istride]); + tmp53 = c_im(input[10 * istride]); + tmp54 = tmp52 + tmp53; + tmp55 = tmp51 + tmp54; + tmp75 = tmp51 - (K500000000 * tmp54); + tmp70 = tmp52 - tmp53; + } + tmp56 = tmp50 - tmp55; + { + fftw_real tmp139; + fftw_real tmp140; + fftw_real tmp130; + fftw_real tmp133; + ASSERT_ALIGNED_DOUBLE; + tmp139 = tmp50 + tmp55; + tmp140 = tmp131 + tmp132; + tmp141 = K300462606 * (tmp139 - tmp140); + tmp147 = tmp139 + tmp140; + tmp130 = tmp74 + tmp75; + tmp133 = tmp131 - (K500000000 * tmp132); + tmp134 = tmp130 - tmp133; + tmp143 = tmp130 + tmp133; + } + tmp67 = tmp59 - tmp66; + tmp71 = K866025403 * (tmp69 + tmp70); + tmp72 = tmp59 + (K500000000 * tmp66); + tmp73 = tmp71 - tmp72; + tmp82 = tmp71 + tmp72; + { + fftw_real tmp135; + fftw_real tmp136; + fftw_real tmp76; + fftw_real tmp79; + ASSERT_ALIGNED_DOUBLE; + tmp135 = tmp62 - tmp65; + tmp136 = tmp70 - tmp69; + tmp137 = tmp135 - tmp136; + tmp144 = tmp136 + tmp135; + tmp76 = tmp74 - tmp75; + tmp79 = K866025403 * (tmp77 - tmp78); + tmp80 = tmp76 - tmp79; + tmp83 = tmp76 + tmp79; + } + } + c_re(output[0]) = tmp1 + tmp24; + { + fftw_real tmp99; + fftw_real tmp109; + fftw_real tmp39; + fftw_real tmp105; + fftw_real tmp89; + fftw_real tmp44; + fftw_real tmp68; + fftw_real tmp85; + fftw_real tmp96; + fftw_real tmp108; + fftw_real tmp90; + fftw_real tmp93; + fftw_real tmp94; + fftw_real tmp106; + fftw_real tmp97; + fftw_real tmp98; + ASSERT_ALIGNED_DOUBLE; + tmp97 = (K113854479 * tmp31) - (K503537032 * tmp36); + tmp98 = (K387390585 * tmp40) - (K265966249 * tmp41); + tmp99 = tmp97 - tmp98; + tmp109 = tmp98 + tmp97; + { + fftw_real tmp42; + fftw_real tmp87; + fftw_real tmp37; + fftw_real tmp38; + fftw_real tmp88; + ASSERT_ALIGNED_DOUBLE; + tmp42 = (K132983124 * tmp40) + (K258260390 * tmp41); + tmp87 = tmp43 - tmp42; + tmp37 = (K251768516 * tmp31) + (K075902986 * tmp36); + tmp38 = tmp1 - (K083333333 * tmp24); + tmp88 = tmp38 - tmp37; + tmp39 = (K2_000000000 * tmp37) + tmp38; + tmp105 = tmp88 - tmp87; + tmp89 = tmp87 + tmp88; + tmp44 = (K2_000000000 * tmp42) + tmp43; + } + { + fftw_real tmp81; + fftw_real tmp84; + fftw_real tmp91; + fftw_real tmp92; + ASSERT_ALIGNED_DOUBLE; + tmp68 = (K575140729 * tmp56) + (K174138601 * tmp67); + tmp81 = (K011599105 * tmp73) + (K300238635 * tmp80); + tmp84 = (K156891391 * tmp82) - (K256247671 * tmp83); + tmp85 = tmp81 + tmp84; + tmp96 = K1_732050807 * (tmp84 - tmp81); + tmp108 = tmp85 - tmp68; + tmp90 = (K174138601 * tmp56) - (K575140729 * tmp67); + tmp91 = (K300238635 * tmp73) - (K011599105 * tmp80); + tmp92 = (K256247671 * tmp82) + (K156891391 * tmp83); + tmp93 = tmp91 + tmp92; + tmp94 = tmp90 - tmp93; + tmp106 = K1_732050807 * (tmp92 - tmp91); + } + { + fftw_real tmp45; + fftw_real tmp86; + fftw_real tmp101; + fftw_real tmp102; + ASSERT_ALIGNED_DOUBLE; + tmp45 = tmp39 - tmp44; + tmp86 = tmp68 + (K2_000000000 * tmp85); + c_re(output[8 * ostride]) = tmp45 - tmp86; + c_re(output[5 * ostride]) = tmp45 + tmp86; + { + fftw_real tmp103; + fftw_real tmp104; + fftw_real tmp95; + fftw_real tmp100; + ASSERT_ALIGNED_DOUBLE; + tmp103 = tmp44 + tmp39; + tmp104 = tmp90 + (K2_000000000 * tmp93); + c_re(output[12 * ostride]) = tmp103 - tmp104; + c_re(output[ostride]) = tmp103 + tmp104; + tmp95 = tmp89 - tmp94; + tmp100 = tmp96 - tmp99; + c_re(output[4 * ostride]) = tmp95 - tmp100; + c_re(output[10 * ostride]) = tmp100 + tmp95; + } + tmp101 = tmp89 + tmp94; + tmp102 = tmp99 + tmp96; + c_re(output[3 * ostride]) = tmp101 - tmp102; + c_re(output[9 * ostride]) = tmp102 + tmp101; + { + fftw_real tmp111; + fftw_real tmp112; + fftw_real tmp107; + fftw_real tmp110; + ASSERT_ALIGNED_DOUBLE; + tmp111 = tmp105 + tmp106; + tmp112 = tmp109 + tmp108; + c_re(output[2 * ostride]) = tmp111 - tmp112; + c_re(output[7 * ostride]) = tmp112 + tmp111; + tmp107 = tmp105 - tmp106; + tmp110 = tmp108 - tmp109; + c_re(output[6 * ostride]) = tmp107 - tmp110; + c_re(output[11 * ostride]) = tmp110 + tmp107; + } + } + } + c_im(output[0]) = tmp147 + tmp146; + { + fftw_real tmp160; + fftw_real tmp173; + fftw_real tmp142; + fftw_real tmp170; + fftw_real tmp164; + fftw_real tmp149; + fftw_real tmp117; + fftw_real tmp128; + fftw_real tmp165; + fftw_real tmp169; + fftw_real tmp152; + fftw_real tmp155; + fftw_real tmp157; + fftw_real tmp172; + fftw_real tmp158; + fftw_real tmp159; + ASSERT_ALIGNED_DOUBLE; + tmp158 = (K387390585 * tmp137) + (K265966249 * tmp134); + tmp159 = (K113854479 * tmp144) + (K503537032 * tmp143); + tmp160 = tmp158 + tmp159; + tmp173 = tmp158 - tmp159; + { + fftw_real tmp138; + fftw_real tmp163; + fftw_real tmp145; + fftw_real tmp148; + fftw_real tmp162; + ASSERT_ALIGNED_DOUBLE; + tmp138 = (K258260390 * tmp134) - (K132983124 * tmp137); + tmp163 = tmp141 - tmp138; + tmp145 = (K075902986 * tmp143) - (K251768516 * tmp144); + tmp148 = tmp146 - (K083333333 * tmp147); + tmp162 = tmp148 - tmp145; + tmp142 = (K2_000000000 * tmp138) + tmp141; + tmp170 = tmp163 + tmp162; + tmp164 = tmp162 - tmp163; + tmp149 = (K2_000000000 * tmp145) + tmp148; + } + { + fftw_real tmp124; + fftw_real tmp127; + fftw_real tmp153; + fftw_real tmp154; + ASSERT_ALIGNED_DOUBLE; + tmp117 = (K575140729 * tmp115) - (K174138601 * tmp116); + tmp124 = (K300238635 * tmp120) + (K011599105 * tmp123); + tmp127 = (K256247671 * tmp125) + (K156891391 * tmp126); + tmp128 = tmp124 - tmp127; + tmp165 = K1_732050807 * (tmp127 + tmp124); + tmp169 = tmp117 - tmp128; + tmp152 = (K575140729 * tmp116) + (K174138601 * tmp115); + tmp153 = (K156891391 * tmp125) - (K256247671 * tmp126); + tmp154 = (K300238635 * tmp123) - (K011599105 * tmp120); + tmp155 = tmp153 + tmp154; + tmp157 = tmp155 - tmp152; + tmp172 = K1_732050807 * (tmp154 - tmp153); + } + { + fftw_real tmp129; + fftw_real tmp150; + fftw_real tmp167; + fftw_real tmp168; + ASSERT_ALIGNED_DOUBLE; + tmp129 = tmp117 + (K2_000000000 * tmp128); + tmp150 = tmp142 + tmp149; + c_im(output[ostride]) = tmp129 + tmp150; + c_im(output[12 * ostride]) = tmp150 - tmp129; + { + fftw_real tmp151; + fftw_real tmp156; + fftw_real tmp161; + fftw_real tmp166; + ASSERT_ALIGNED_DOUBLE; + tmp151 = tmp149 - tmp142; + tmp156 = tmp152 + (K2_000000000 * tmp155); + c_im(output[5 * ostride]) = tmp151 - tmp156; + c_im(output[8 * ostride]) = tmp156 + tmp151; + tmp161 = tmp157 + tmp160; + tmp166 = tmp164 - tmp165; + c_im(output[2 * ostride]) = tmp161 + tmp166; + c_im(output[7 * ostride]) = tmp166 - tmp161; + } + tmp167 = tmp165 + tmp164; + tmp168 = tmp160 - tmp157; + c_im(output[6 * ostride]) = tmp167 - tmp168; + c_im(output[11 * ostride]) = tmp168 + tmp167; + { + fftw_real tmp175; + fftw_real tmp176; + fftw_real tmp171; + fftw_real tmp174; + ASSERT_ALIGNED_DOUBLE; + tmp175 = tmp170 - tmp169; + tmp176 = tmp172 - tmp173; + c_im(output[4 * ostride]) = tmp175 - tmp176; + c_im(output[10 * ostride]) = tmp176 + tmp175; + tmp171 = tmp169 + tmp170; + tmp174 = tmp172 + tmp173; + c_im(output[3 * ostride]) = tmp171 - tmp174; + c_im(output[9 * ostride]) = tmp174 + tmp171; + } + } + } +} + +fftw_codelet_desc fftw_no_twiddle_13_desc = { + "fftw_no_twiddle_13", + (void (*)()) fftw_no_twiddle_13, + 13, + FFTW_FORWARD, + FFTW_NOTW, + 287, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_14.c b/src/fftw/fn_14.c new file mode 100644 index 0000000..faf1bd8 --- /dev/null +++ b/src/fftw/fn_14.c @@ -0,0 +1,409 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:41 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 14 */ + +/* + * This function contains 148 FP additions, 72 FP multiplications, + * (or, 148 additions, 72 multiplications, 0 fused multiply/add), + * 36 stack variables, and 56 memory accesses + */ +static const fftw_real K900968867 = +FFTW_KONST(+0.900968867902419126236102319507445051165919162); +static const fftw_real K222520933 = +FFTW_KONST(+0.222520933956314404288902564496794759466355569); +static const fftw_real K623489801 = +FFTW_KONST(+0.623489801858733530525004884004239810632274731); +static const fftw_real K781831482 = +FFTW_KONST(+0.781831482468029808708444526674057750232334519); +static const fftw_real K974927912 = +FFTW_KONST(+0.974927912181823607018131682993931217232785801); +static const fftw_real K433883739 = +FFTW_KONST(+0.433883739117558120475768332848358754609990728); + +/* + * Generator Id's : + * $Id: fn_14.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_14.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_14.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_14(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp3; + fftw_real tmp25; + fftw_real tmp84; + fftw_real tmp93; + fftw_real tmp10; + fftw_real tmp77; + fftw_real tmp28; + fftw_real tmp97; + fftw_real tmp42; + fftw_real tmp86; + fftw_real tmp65; + fftw_real tmp92; + fftw_real tmp17; + fftw_real tmp79; + fftw_real tmp31; + fftw_real tmp99; + fftw_real tmp56; + fftw_real tmp81; + fftw_real tmp68; + fftw_real tmp94; + fftw_real tmp24; + fftw_real tmp78; + fftw_real tmp34; + fftw_real tmp98; + fftw_real tmp49; + fftw_real tmp85; + fftw_real tmp71; + fftw_real tmp95; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp82; + fftw_real tmp83; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[7 * istride]); + tmp3 = tmp1 - tmp2; + tmp25 = tmp1 + tmp2; + tmp82 = c_im(input[0]); + tmp83 = c_im(input[7 * istride]); + tmp84 = tmp82 - tmp83; + tmp93 = tmp82 + tmp83; + } + { + fftw_real tmp6; + fftw_real tmp26; + fftw_real tmp9; + fftw_real tmp27; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp7; + fftw_real tmp8; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[2 * istride]); + tmp5 = c_re(input[9 * istride]); + tmp6 = tmp4 - tmp5; + tmp26 = tmp4 + tmp5; + tmp7 = c_re(input[12 * istride]); + tmp8 = c_re(input[5 * istride]); + tmp9 = tmp7 - tmp8; + tmp27 = tmp7 + tmp8; + } + tmp10 = tmp6 + tmp9; + tmp77 = tmp9 - tmp6; + tmp28 = tmp26 + tmp27; + tmp97 = tmp27 - tmp26; + } + { + fftw_real tmp38; + fftw_real tmp63; + fftw_real tmp41; + fftw_real tmp64; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp36; + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp40; + ASSERT_ALIGNED_DOUBLE; + tmp36 = c_im(input[2 * istride]); + tmp37 = c_im(input[9 * istride]); + tmp38 = tmp36 - tmp37; + tmp63 = tmp36 + tmp37; + tmp39 = c_im(input[12 * istride]); + tmp40 = c_im(input[5 * istride]); + tmp41 = tmp39 - tmp40; + tmp64 = tmp39 + tmp40; + } + tmp42 = tmp38 - tmp41; + tmp86 = tmp38 + tmp41; + tmp65 = tmp63 - tmp64; + tmp92 = tmp63 + tmp64; + } + { + fftw_real tmp13; + fftw_real tmp29; + fftw_real tmp16; + fftw_real tmp30; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp11; + fftw_real tmp12; + fftw_real tmp14; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[4 * istride]); + tmp12 = c_re(input[11 * istride]); + tmp13 = tmp11 - tmp12; + tmp29 = tmp11 + tmp12; + tmp14 = c_re(input[10 * istride]); + tmp15 = c_re(input[3 * istride]); + tmp16 = tmp14 - tmp15; + tmp30 = tmp14 + tmp15; + } + tmp17 = tmp13 + tmp16; + tmp79 = tmp16 - tmp13; + tmp31 = tmp29 + tmp30; + tmp99 = tmp29 - tmp30; + } + { + fftw_real tmp52; + fftw_real tmp67; + fftw_real tmp55; + fftw_real tmp66; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp50; + fftw_real tmp51; + fftw_real tmp53; + fftw_real tmp54; + ASSERT_ALIGNED_DOUBLE; + tmp50 = c_im(input[4 * istride]); + tmp51 = c_im(input[11 * istride]); + tmp52 = tmp50 - tmp51; + tmp67 = tmp50 + tmp51; + tmp53 = c_im(input[10 * istride]); + tmp54 = c_im(input[3 * istride]); + tmp55 = tmp53 - tmp54; + tmp66 = tmp53 + tmp54; + } + tmp56 = tmp52 - tmp55; + tmp81 = tmp52 + tmp55; + tmp68 = tmp66 - tmp67; + tmp94 = tmp67 + tmp66; + } + { + fftw_real tmp20; + fftw_real tmp32; + fftw_real tmp23; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp18; + fftw_real tmp19; + fftw_real tmp21; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp18 = c_re(input[6 * istride]); + tmp19 = c_re(input[13 * istride]); + tmp20 = tmp18 - tmp19; + tmp32 = tmp18 + tmp19; + tmp21 = c_re(input[8 * istride]); + tmp22 = c_re(input[istride]); + tmp23 = tmp21 - tmp22; + tmp33 = tmp21 + tmp22; + } + tmp24 = tmp20 + tmp23; + tmp78 = tmp23 - tmp20; + tmp34 = tmp32 + tmp33; + tmp98 = tmp32 - tmp33; + } + { + fftw_real tmp45; + fftw_real tmp70; + fftw_real tmp48; + fftw_real tmp69; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp43; + fftw_real tmp44; + fftw_real tmp46; + fftw_real tmp47; + ASSERT_ALIGNED_DOUBLE; + tmp43 = c_im(input[6 * istride]); + tmp44 = c_im(input[13 * istride]); + tmp45 = tmp43 - tmp44; + tmp70 = tmp43 + tmp44; + tmp46 = c_im(input[8 * istride]); + tmp47 = c_im(input[istride]); + tmp48 = tmp46 - tmp47; + tmp69 = tmp46 + tmp47; + } + tmp49 = tmp45 - tmp48; + tmp85 = tmp45 + tmp48; + tmp71 = tmp69 - tmp70; + tmp95 = tmp70 + tmp69; + } + { + fftw_real tmp57; + fftw_real tmp35; + fftw_real tmp72; + fftw_real tmp62; + ASSERT_ALIGNED_DOUBLE; + c_re(output[7 * ostride]) = tmp3 + tmp10 + tmp17 + tmp24; + tmp57 = + (K433883739 * tmp42) + (K974927912 * tmp49) - + (K781831482 * tmp56); + tmp35 = + tmp3 + (K623489801 * tmp17) - (K222520933 * tmp24) - + (K900968867 * tmp10); + c_re(output[11 * ostride]) = tmp35 - tmp57; + c_re(output[3 * ostride]) = tmp35 + tmp57; + { + fftw_real tmp59; + fftw_real tmp58; + fftw_real tmp61; + fftw_real tmp60; + ASSERT_ALIGNED_DOUBLE; + tmp59 = + (K974927912 * tmp42) - (K433883739 * tmp56) - + (K781831482 * tmp49); + tmp58 = + tmp3 + (K623489801 * tmp24) - (K900968867 * tmp17) - + (K222520933 * tmp10); + c_re(output[5 * ostride]) = tmp58 - tmp59; + c_re(output[9 * ostride]) = tmp58 + tmp59; + tmp61 = + (K781831482 * tmp42) + (K433883739 * tmp49) + + (K974927912 * tmp56); + tmp60 = + tmp3 + (K623489801 * tmp10) - (K900968867 * tmp24) - + (K222520933 * tmp17); + c_re(output[13 * ostride]) = tmp60 - tmp61; + c_re(output[ostride]) = tmp60 + tmp61; + } + c_re(output[0]) = tmp25 + tmp28 + tmp31 + tmp34; + tmp72 = + (K781831482 * tmp65) - (K974927912 * tmp68) - + (K433883739 * tmp71); + tmp62 = + tmp25 + (K623489801 * tmp28) - (K900968867 * tmp34) - + (K222520933 * tmp31); + c_re(output[6 * ostride]) = tmp62 - tmp72; + c_re(output[8 * ostride]) = tmp62 + tmp72; + { + fftw_real tmp74; + fftw_real tmp73; + fftw_real tmp76; + fftw_real tmp75; + ASSERT_ALIGNED_DOUBLE; + tmp74 = + (K433883739 * tmp65) + (K781831482 * tmp68) - + (K974927912 * tmp71); + tmp73 = + tmp25 + (K623489801 * tmp31) - (K222520933 * tmp34) - + (K900968867 * tmp28); + c_re(output[4 * ostride]) = tmp73 - tmp74; + c_re(output[10 * ostride]) = tmp73 + tmp74; + tmp76 = + (K974927912 * tmp65) + (K781831482 * tmp71) + + (K433883739 * tmp68); + tmp75 = + tmp25 + (K623489801 * tmp34) - (K900968867 * tmp31) - + (K222520933 * tmp28); + c_re(output[12 * ostride]) = tmp75 - tmp76; + c_re(output[2 * ostride]) = tmp75 + tmp76; + } + } + { + fftw_real tmp91; + fftw_real tmp90; + fftw_real tmp103; + fftw_real tmp104; + ASSERT_ALIGNED_DOUBLE; + c_im(output[7 * ostride]) = tmp86 + tmp85 + tmp81 + tmp84; + tmp91 = + (K974927912 * tmp77) - (K781831482 * tmp78) - + (K433883739 * tmp79); + tmp90 = + (K623489801 * tmp85) + tmp84 - (K900968867 * tmp81) - + (K222520933 * tmp86); + c_im(output[5 * ostride]) = tmp90 - tmp91; + c_im(output[9 * ostride]) = tmp91 + tmp90; + { + fftw_real tmp88; + fftw_real tmp89; + fftw_real tmp80; + fftw_real tmp87; + ASSERT_ALIGNED_DOUBLE; + tmp88 = + (K781831482 * tmp77) + (K974927912 * tmp79) + + (K433883739 * tmp78); + tmp89 = + (K623489801 * tmp86) + tmp84 - (K222520933 * tmp81) - + (K900968867 * tmp85); + c_im(output[ostride]) = tmp88 + tmp89; + c_im(output[13 * ostride]) = tmp89 - tmp88; + tmp80 = + (K433883739 * tmp77) + (K974927912 * tmp78) - + (K781831482 * tmp79); + tmp87 = + (K623489801 * tmp81) + tmp84 - (K222520933 * tmp85) - + (K900968867 * tmp86); + c_im(output[3 * ostride]) = tmp80 + tmp87; + c_im(output[11 * ostride]) = tmp87 - tmp80; + } + c_im(output[0]) = tmp92 + tmp95 + tmp94 + tmp93; + tmp103 = + (K974927912 * tmp97) + (K433883739 * tmp99) + + (K781831482 * tmp98); + tmp104 = + (K623489801 * tmp95) + tmp93 - (K900968867 * tmp94) - + (K222520933 * tmp92); + c_im(output[2 * ostride]) = tmp103 + tmp104; + c_im(output[12 * ostride]) = tmp104 - tmp103; + { + fftw_real tmp100; + fftw_real tmp96; + fftw_real tmp102; + fftw_real tmp101; + ASSERT_ALIGNED_DOUBLE; + tmp100 = + (K781831482 * tmp97) - (K433883739 * tmp98) - + (K974927912 * tmp99); + tmp96 = + (K623489801 * tmp92) + tmp93 - (K222520933 * tmp94) - + (K900968867 * tmp95); + c_im(output[6 * ostride]) = tmp96 - tmp100; + c_im(output[8 * ostride]) = tmp100 + tmp96; + tmp102 = + (K433883739 * tmp97) + (K781831482 * tmp99) - + (K974927912 * tmp98); + tmp101 = + (K623489801 * tmp94) + tmp93 - (K222520933 * tmp95) - + (K900968867 * tmp92); + c_im(output[4 * ostride]) = tmp101 - tmp102; + c_im(output[10 * ostride]) = tmp102 + tmp101; + } + } +} + +fftw_codelet_desc fftw_no_twiddle_14_desc = { + "fftw_no_twiddle_14", + (void (*)()) fftw_no_twiddle_14, + 14, + FFTW_FORWARD, + FFTW_NOTW, + 309, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_15.c b/src/fftw/fn_15.c new file mode 100644 index 0000000..22eaa43 --- /dev/null +++ b/src/fftw/fn_15.c @@ -0,0 +1,452 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:42 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 15 */ + +/* + * This function contains 156 FP additions, 56 FP multiplications, + * (or, 128 additions, 28 multiplications, 28 fused multiply/add), + * 62 stack variables, and 60 memory accesses + */ +static const fftw_real K587785252 = +FFTW_KONST(+0.587785252292473129168705954639072768597652438); +static const fftw_real K951056516 = +FFTW_KONST(+0.951056516295153572116439333379382143405698634); +static const fftw_real K250000000 = +FFTW_KONST(+0.250000000000000000000000000000000000000000000); +static const fftw_real K559016994 = +FFTW_KONST(+0.559016994374947424102293417182819058860154590); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); + +/* + * Generator Id's : + * $Id: fn_15.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_15.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_15.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_15(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp5; + fftw_real tmp33; + fftw_real tmp57; + fftw_real tmp145; + fftw_real tmp124; + fftw_real tmp136; + fftw_real tmp21; + fftw_real tmp26; + fftw_real tmp27; + fftw_real tmp49; + fftw_real tmp54; + fftw_real tmp55; + fftw_real tmp108; + fftw_real tmp109; + fftw_real tmp147; + fftw_real tmp61; + fftw_real tmp62; + fftw_real tmp63; + fftw_real tmp96; + fftw_real tmp97; + fftw_real tmp138; + fftw_real tmp83; + fftw_real tmp88; + fftw_real tmp118; + fftw_real tmp10; + fftw_real tmp15; + fftw_real tmp16; + fftw_real tmp38; + fftw_real tmp43; + fftw_real tmp44; + fftw_real tmp111; + fftw_real tmp112; + fftw_real tmp146; + fftw_real tmp58; + fftw_real tmp59; + fftw_real tmp60; + fftw_real tmp99; + fftw_real tmp100; + fftw_real tmp137; + fftw_real tmp72; + fftw_real tmp77; + fftw_real tmp117; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp121; + fftw_real tmp4; + fftw_real tmp120; + fftw_real tmp32; + fftw_real tmp122; + fftw_real tmp29; + fftw_real tmp123; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp121 = c_im(input[0]); + { + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp30; + fftw_real tmp31; + ASSERT_ALIGNED_DOUBLE; + tmp2 = c_re(input[5 * istride]); + tmp3 = c_re(input[10 * istride]); + tmp4 = tmp2 + tmp3; + tmp120 = K866025403 * (tmp3 - tmp2); + tmp30 = c_im(input[5 * istride]); + tmp31 = c_im(input[10 * istride]); + tmp32 = K866025403 * (tmp30 - tmp31); + tmp122 = tmp30 + tmp31; + } + tmp5 = tmp1 + tmp4; + tmp29 = tmp1 - (K500000000 * tmp4); + tmp33 = tmp29 - tmp32; + tmp57 = tmp29 + tmp32; + tmp145 = tmp122 + tmp121; + tmp123 = tmp121 - (K500000000 * tmp122); + tmp124 = tmp120 + tmp123; + tmp136 = tmp123 - tmp120; + } + { + fftw_real tmp17; + fftw_real tmp20; + fftw_real tmp45; + fftw_real tmp79; + fftw_real tmp80; + fftw_real tmp81; + fftw_real tmp48; + fftw_real tmp82; + fftw_real tmp22; + fftw_real tmp25; + fftw_real tmp50; + fftw_real tmp84; + fftw_real tmp85; + fftw_real tmp86; + fftw_real tmp53; + fftw_real tmp87; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp18; + fftw_real tmp19; + fftw_real tmp46; + fftw_real tmp47; + ASSERT_ALIGNED_DOUBLE; + tmp17 = c_re(input[6 * istride]); + tmp18 = c_re(input[11 * istride]); + tmp19 = c_re(input[istride]); + tmp20 = tmp18 + tmp19; + tmp45 = tmp17 - (K500000000 * tmp20); + tmp79 = K866025403 * (tmp19 - tmp18); + tmp80 = c_im(input[6 * istride]); + tmp46 = c_im(input[11 * istride]); + tmp47 = c_im(input[istride]); + tmp81 = tmp46 + tmp47; + tmp48 = K866025403 * (tmp46 - tmp47); + tmp82 = tmp80 - (K500000000 * tmp81); + } + { + fftw_real tmp23; + fftw_real tmp24; + fftw_real tmp51; + fftw_real tmp52; + ASSERT_ALIGNED_DOUBLE; + tmp22 = c_re(input[9 * istride]); + tmp23 = c_re(input[14 * istride]); + tmp24 = c_re(input[4 * istride]); + tmp25 = tmp23 + tmp24; + tmp50 = tmp22 - (K500000000 * tmp25); + tmp84 = K866025403 * (tmp24 - tmp23); + tmp85 = c_im(input[9 * istride]); + tmp51 = c_im(input[14 * istride]); + tmp52 = c_im(input[4 * istride]); + tmp86 = tmp51 + tmp52; + tmp53 = K866025403 * (tmp51 - tmp52); + tmp87 = tmp85 - (K500000000 * tmp86); + } + tmp21 = tmp17 + tmp20; + tmp26 = tmp22 + tmp25; + tmp27 = tmp21 + tmp26; + tmp49 = tmp45 - tmp48; + tmp54 = tmp50 - tmp53; + tmp55 = tmp49 + tmp54; + tmp108 = tmp81 + tmp80; + tmp109 = tmp86 + tmp85; + tmp147 = tmp108 + tmp109; + tmp61 = tmp45 + tmp48; + tmp62 = tmp50 + tmp53; + tmp63 = tmp61 + tmp62; + tmp96 = tmp82 - tmp79; + tmp97 = tmp87 - tmp84; + tmp138 = tmp96 + tmp97; + tmp83 = tmp79 + tmp82; + tmp88 = tmp84 + tmp87; + tmp118 = tmp83 + tmp88; + } + { + fftw_real tmp6; + fftw_real tmp9; + fftw_real tmp34; + fftw_real tmp68; + fftw_real tmp69; + fftw_real tmp70; + fftw_real tmp37; + fftw_real tmp71; + fftw_real tmp11; + fftw_real tmp14; + fftw_real tmp39; + fftw_real tmp73; + fftw_real tmp74; + fftw_real tmp75; + fftw_real tmp42; + fftw_real tmp76; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp7; + fftw_real tmp8; + fftw_real tmp35; + fftw_real tmp36; + ASSERT_ALIGNED_DOUBLE; + tmp6 = c_re(input[3 * istride]); + tmp7 = c_re(input[8 * istride]); + tmp8 = c_re(input[13 * istride]); + tmp9 = tmp7 + tmp8; + tmp34 = tmp6 - (K500000000 * tmp9); + tmp68 = K866025403 * (tmp8 - tmp7); + tmp69 = c_im(input[3 * istride]); + tmp35 = c_im(input[8 * istride]); + tmp36 = c_im(input[13 * istride]); + tmp70 = tmp35 + tmp36; + tmp37 = K866025403 * (tmp35 - tmp36); + tmp71 = tmp69 - (K500000000 * tmp70); + } + { + fftw_real tmp12; + fftw_real tmp13; + fftw_real tmp40; + fftw_real tmp41; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[12 * istride]); + tmp12 = c_re(input[2 * istride]); + tmp13 = c_re(input[7 * istride]); + tmp14 = tmp12 + tmp13; + tmp39 = tmp11 - (K500000000 * tmp14); + tmp73 = K866025403 * (tmp13 - tmp12); + tmp74 = c_im(input[12 * istride]); + tmp40 = c_im(input[2 * istride]); + tmp41 = c_im(input[7 * istride]); + tmp75 = tmp40 + tmp41; + tmp42 = K866025403 * (tmp40 - tmp41); + tmp76 = tmp74 - (K500000000 * tmp75); + } + tmp10 = tmp6 + tmp9; + tmp15 = tmp11 + tmp14; + tmp16 = tmp10 + tmp15; + tmp38 = tmp34 - tmp37; + tmp43 = tmp39 - tmp42; + tmp44 = tmp38 + tmp43; + tmp111 = tmp70 + tmp69; + tmp112 = tmp75 + tmp74; + tmp146 = tmp111 + tmp112; + tmp58 = tmp34 + tmp37; + tmp59 = tmp39 + tmp42; + tmp60 = tmp58 + tmp59; + tmp99 = tmp71 - tmp68; + tmp100 = tmp76 - tmp73; + tmp137 = tmp99 + tmp100; + tmp72 = tmp68 + tmp71; + tmp77 = tmp73 + tmp76; + tmp117 = tmp72 + tmp77; + } + { + fftw_real tmp106; + fftw_real tmp28; + fftw_real tmp105; + fftw_real tmp114; + fftw_real tmp116; + fftw_real tmp110; + fftw_real tmp113; + fftw_real tmp115; + fftw_real tmp107; + ASSERT_ALIGNED_DOUBLE; + tmp106 = K559016994 * (tmp16 - tmp27); + tmp28 = tmp16 + tmp27; + tmp105 = tmp5 - (K250000000 * tmp28); + tmp110 = tmp108 - tmp109; + tmp113 = tmp111 - tmp112; + tmp114 = (K951056516 * tmp110) - (K587785252 * tmp113); + tmp116 = (K951056516 * tmp113) + (K587785252 * tmp110); + c_re(output[0]) = tmp5 + tmp28; + tmp115 = tmp106 + tmp105; + c_re(output[9 * ostride]) = tmp115 - tmp116; + c_re(output[6 * ostride]) = tmp115 + tmp116; + tmp107 = tmp105 - tmp106; + c_re(output[12 * ostride]) = tmp107 - tmp114; + c_re(output[3 * ostride]) = tmp107 + tmp114; + } + { + fftw_real tmp94; + fftw_real tmp56; + fftw_real tmp93; + fftw_real tmp102; + fftw_real tmp104; + fftw_real tmp98; + fftw_real tmp101; + fftw_real tmp103; + fftw_real tmp95; + ASSERT_ALIGNED_DOUBLE; + tmp94 = K559016994 * (tmp44 - tmp55); + tmp56 = tmp44 + tmp55; + tmp93 = tmp33 - (K250000000 * tmp56); + tmp98 = tmp96 - tmp97; + tmp101 = tmp99 - tmp100; + tmp102 = (K951056516 * tmp98) - (K587785252 * tmp101); + tmp104 = (K951056516 * tmp101) + (K587785252 * tmp98); + c_re(output[5 * ostride]) = tmp33 + tmp56; + tmp103 = tmp94 + tmp93; + c_re(output[14 * ostride]) = tmp103 - tmp104; + c_re(output[11 * ostride]) = tmp103 + tmp104; + tmp95 = tmp93 - tmp94; + c_re(output[2 * ostride]) = tmp95 - tmp102; + c_re(output[8 * ostride]) = tmp95 + tmp102; + } + { + fftw_real tmp150; + fftw_real tmp148; + fftw_real tmp149; + fftw_real tmp154; + fftw_real tmp156; + fftw_real tmp152; + fftw_real tmp153; + fftw_real tmp155; + fftw_real tmp151; + ASSERT_ALIGNED_DOUBLE; + tmp150 = K559016994 * (tmp146 - tmp147); + tmp148 = tmp146 + tmp147; + tmp149 = tmp145 - (K250000000 * tmp148); + tmp152 = tmp21 - tmp26; + tmp153 = tmp10 - tmp15; + tmp154 = (K951056516 * tmp152) - (K587785252 * tmp153); + tmp156 = (K951056516 * tmp153) + (K587785252 * tmp152); + c_im(output[0]) = tmp148 + tmp145; + tmp155 = tmp150 + tmp149; + c_im(output[6 * ostride]) = tmp155 - tmp156; + c_im(output[9 * ostride]) = tmp156 + tmp155; + tmp151 = tmp149 - tmp150; + c_im(output[3 * ostride]) = tmp151 - tmp154; + c_im(output[12 * ostride]) = tmp154 + tmp151; + } + { + fftw_real tmp141; + fftw_real tmp139; + fftw_real tmp140; + fftw_real tmp135; + fftw_real tmp144; + fftw_real tmp133; + fftw_real tmp134; + fftw_real tmp143; + fftw_real tmp142; + ASSERT_ALIGNED_DOUBLE; + tmp141 = K559016994 * (tmp137 - tmp138); + tmp139 = tmp137 + tmp138; + tmp140 = tmp136 - (K250000000 * tmp139); + tmp133 = tmp49 - tmp54; + tmp134 = tmp38 - tmp43; + tmp135 = (K951056516 * tmp133) - (K587785252 * tmp134); + tmp144 = (K951056516 * tmp134) + (K587785252 * tmp133); + c_im(output[5 * ostride]) = tmp139 + tmp136; + tmp143 = tmp141 + tmp140; + c_im(output[11 * ostride]) = tmp143 - tmp144; + c_im(output[14 * ostride]) = tmp144 + tmp143; + tmp142 = tmp140 - tmp141; + c_im(output[2 * ostride]) = tmp135 + tmp142; + c_im(output[8 * ostride]) = tmp142 - tmp135; + } + { + fftw_real tmp119; + fftw_real tmp125; + fftw_real tmp126; + fftw_real tmp130; + fftw_real tmp131; + fftw_real tmp128; + fftw_real tmp129; + fftw_real tmp132; + fftw_real tmp127; + ASSERT_ALIGNED_DOUBLE; + tmp119 = K559016994 * (tmp117 - tmp118); + tmp125 = tmp117 + tmp118; + tmp126 = tmp124 - (K250000000 * tmp125); + tmp128 = tmp58 - tmp59; + tmp129 = tmp61 - tmp62; + tmp130 = (K951056516 * tmp128) + (K587785252 * tmp129); + tmp131 = (K951056516 * tmp129) - (K587785252 * tmp128); + c_im(output[10 * ostride]) = tmp125 + tmp124; + tmp132 = tmp126 - tmp119; + c_im(output[7 * ostride]) = tmp131 + tmp132; + c_im(output[13 * ostride]) = tmp132 - tmp131; + tmp127 = tmp119 + tmp126; + c_im(output[ostride]) = tmp127 - tmp130; + c_im(output[4 * ostride]) = tmp130 + tmp127; + } + { + fftw_real tmp65; + fftw_real tmp64; + fftw_real tmp66; + fftw_real tmp90; + fftw_real tmp92; + fftw_real tmp78; + fftw_real tmp89; + fftw_real tmp91; + fftw_real tmp67; + ASSERT_ALIGNED_DOUBLE; + tmp65 = K559016994 * (tmp60 - tmp63); + tmp64 = tmp60 + tmp63; + tmp66 = tmp57 - (K250000000 * tmp64); + tmp78 = tmp72 - tmp77; + tmp89 = tmp83 - tmp88; + tmp90 = (K951056516 * tmp78) + (K587785252 * tmp89); + tmp92 = (K951056516 * tmp89) - (K587785252 * tmp78); + c_re(output[10 * ostride]) = tmp57 + tmp64; + tmp91 = tmp66 - tmp65; + c_re(output[7 * ostride]) = tmp91 - tmp92; + c_re(output[13 * ostride]) = tmp91 + tmp92; + tmp67 = tmp65 + tmp66; + c_re(output[4 * ostride]) = tmp67 - tmp90; + c_re(output[ostride]) = tmp67 + tmp90; + } +} + +fftw_codelet_desc fftw_no_twiddle_15_desc = { + "fftw_no_twiddle_15", + (void (*)()) fftw_no_twiddle_15, + 15, + FFTW_FORWARD, + FFTW_NOTW, + 331, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_16.c b/src/fftw/fn_16.c new file mode 100644 index 0000000..d75b389 --- /dev/null +++ b/src/fftw/fn_16.c @@ -0,0 +1,442 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:46 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 16 */ + +/* + * This function contains 144 FP additions, 24 FP multiplications, + * (or, 136 additions, 16 multiplications, 8 fused multiply/add), + * 46 stack variables, and 64 memory accesses + */ +static const fftw_real K923879532 = +FFTW_KONST(+0.923879532511286756128183189396788286822416626); +static const fftw_real K382683432 = +FFTW_KONST(+0.382683432365089771728459984030398866761344562); +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: fn_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_16(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp7; + fftw_real tmp115; + fftw_real tmp38; + fftw_real tmp129; + fftw_real tmp49; + fftw_real tmp95; + fftw_real tmp83; + fftw_real tmp105; + fftw_real tmp29; + fftw_real tmp123; + fftw_real tmp73; + fftw_real tmp101; + fftw_real tmp78; + fftw_real tmp102; + fftw_real tmp126; + fftw_real tmp141; + fftw_real tmp14; + fftw_real tmp130; + fftw_real tmp45; + fftw_real tmp116; + fftw_real tmp52; + fftw_real tmp85; + fftw_real tmp55; + fftw_real tmp84; + fftw_real tmp22; + fftw_real tmp118; + fftw_real tmp62; + fftw_real tmp98; + fftw_real tmp67; + fftw_real tmp99; + fftw_real tmp121; + fftw_real tmp140; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp3; + fftw_real tmp47; + fftw_real tmp34; + fftw_real tmp82; + fftw_real tmp6; + fftw_real tmp81; + fftw_real tmp37; + fftw_real tmp48; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp32; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[8 * istride]); + tmp3 = tmp1 + tmp2; + tmp47 = tmp1 - tmp2; + tmp32 = c_im(input[0]); + tmp33 = c_im(input[8 * istride]); + tmp34 = tmp32 + tmp33; + tmp82 = tmp32 - tmp33; + } + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp35; + fftw_real tmp36; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[4 * istride]); + tmp5 = c_re(input[12 * istride]); + tmp6 = tmp4 + tmp5; + tmp81 = tmp4 - tmp5; + tmp35 = c_im(input[4 * istride]); + tmp36 = c_im(input[12 * istride]); + tmp37 = tmp35 + tmp36; + tmp48 = tmp35 - tmp36; + } + tmp7 = tmp3 + tmp6; + tmp115 = tmp3 - tmp6; + tmp38 = tmp34 + tmp37; + tmp129 = tmp34 - tmp37; + tmp49 = tmp47 - tmp48; + tmp95 = tmp47 + tmp48; + tmp83 = tmp81 + tmp82; + tmp105 = tmp82 - tmp81; + } + { + fftw_real tmp25; + fftw_real tmp69; + fftw_real tmp77; + fftw_real tmp124; + fftw_real tmp28; + fftw_real tmp74; + fftw_real tmp72; + fftw_real tmp125; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp23; + fftw_real tmp24; + fftw_real tmp75; + fftw_real tmp76; + ASSERT_ALIGNED_DOUBLE; + tmp23 = c_re(input[15 * istride]); + tmp24 = c_re(input[7 * istride]); + tmp25 = tmp23 + tmp24; + tmp69 = tmp23 - tmp24; + tmp75 = c_im(input[15 * istride]); + tmp76 = c_im(input[7 * istride]); + tmp77 = tmp75 - tmp76; + tmp124 = tmp75 + tmp76; + } + { + fftw_real tmp26; + fftw_real tmp27; + fftw_real tmp70; + fftw_real tmp71; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(input[3 * istride]); + tmp27 = c_re(input[11 * istride]); + tmp28 = tmp26 + tmp27; + tmp74 = tmp26 - tmp27; + tmp70 = c_im(input[3 * istride]); + tmp71 = c_im(input[11 * istride]); + tmp72 = tmp70 - tmp71; + tmp125 = tmp70 + tmp71; + } + tmp29 = tmp25 + tmp28; + tmp123 = tmp25 - tmp28; + tmp73 = tmp69 - tmp72; + tmp101 = tmp69 + tmp72; + tmp78 = tmp74 + tmp77; + tmp102 = tmp77 - tmp74; + tmp126 = tmp124 - tmp125; + tmp141 = tmp124 + tmp125; + } + { + fftw_real tmp10; + fftw_real tmp51; + fftw_real tmp41; + fftw_real tmp50; + fftw_real tmp13; + fftw_real tmp53; + fftw_real tmp44; + fftw_real tmp54; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp39; + fftw_real tmp40; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(input[2 * istride]); + tmp9 = c_re(input[10 * istride]); + tmp10 = tmp8 + tmp9; + tmp51 = tmp8 - tmp9; + tmp39 = c_im(input[2 * istride]); + tmp40 = c_im(input[10 * istride]); + tmp41 = tmp39 + tmp40; + tmp50 = tmp39 - tmp40; + } + { + fftw_real tmp11; + fftw_real tmp12; + fftw_real tmp42; + fftw_real tmp43; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[14 * istride]); + tmp12 = c_re(input[6 * istride]); + tmp13 = tmp11 + tmp12; + tmp53 = tmp11 - tmp12; + tmp42 = c_im(input[14 * istride]); + tmp43 = c_im(input[6 * istride]); + tmp44 = tmp42 + tmp43; + tmp54 = tmp42 - tmp43; + } + tmp14 = tmp10 + tmp13; + tmp130 = tmp13 - tmp10; + tmp45 = tmp41 + tmp44; + tmp116 = tmp41 - tmp44; + tmp52 = tmp50 - tmp51; + tmp85 = tmp51 + tmp50; + tmp55 = tmp53 + tmp54; + tmp84 = tmp53 - tmp54; + } + { + fftw_real tmp18; + fftw_real tmp63; + fftw_real tmp61; + fftw_real tmp119; + fftw_real tmp21; + fftw_real tmp58; + fftw_real tmp66; + fftw_real tmp120; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp16; + fftw_real tmp17; + fftw_real tmp59; + fftw_real tmp60; + ASSERT_ALIGNED_DOUBLE; + tmp16 = c_re(input[istride]); + tmp17 = c_re(input[9 * istride]); + tmp18 = tmp16 + tmp17; + tmp63 = tmp16 - tmp17; + tmp59 = c_im(input[istride]); + tmp60 = c_im(input[9 * istride]); + tmp61 = tmp59 - tmp60; + tmp119 = tmp59 + tmp60; + } + { + fftw_real tmp19; + fftw_real tmp20; + fftw_real tmp64; + fftw_real tmp65; + ASSERT_ALIGNED_DOUBLE; + tmp19 = c_re(input[5 * istride]); + tmp20 = c_re(input[13 * istride]); + tmp21 = tmp19 + tmp20; + tmp58 = tmp19 - tmp20; + tmp64 = c_im(input[5 * istride]); + tmp65 = c_im(input[13 * istride]); + tmp66 = tmp64 - tmp65; + tmp120 = tmp64 + tmp65; + } + tmp22 = tmp18 + tmp21; + tmp118 = tmp18 - tmp21; + tmp62 = tmp58 + tmp61; + tmp98 = tmp61 - tmp58; + tmp67 = tmp63 - tmp66; + tmp99 = tmp63 + tmp66; + tmp121 = tmp119 - tmp120; + tmp140 = tmp119 + tmp120; + } + { + fftw_real tmp15; + fftw_real tmp30; + fftw_real tmp31; + fftw_real tmp46; + ASSERT_ALIGNED_DOUBLE; + tmp15 = tmp7 + tmp14; + tmp30 = tmp22 + tmp29; + c_re(output[8 * ostride]) = tmp15 - tmp30; + c_re(output[0]) = tmp15 + tmp30; + tmp31 = tmp29 - tmp22; + tmp46 = tmp38 - tmp45; + c_im(output[4 * ostride]) = tmp31 + tmp46; + c_im(output[12 * ostride]) = tmp46 - tmp31; + } + { + fftw_real tmp143; + fftw_real tmp144; + fftw_real tmp139; + fftw_real tmp142; + ASSERT_ALIGNED_DOUBLE; + tmp143 = tmp38 + tmp45; + tmp144 = tmp140 + tmp141; + c_im(output[8 * ostride]) = tmp143 - tmp144; + c_im(output[0]) = tmp143 + tmp144; + tmp139 = tmp7 - tmp14; + tmp142 = tmp140 - tmp141; + c_re(output[12 * ostride]) = tmp139 - tmp142; + c_re(output[4 * ostride]) = tmp139 + tmp142; + } + { + fftw_real tmp117; + fftw_real tmp131; + fftw_real tmp128; + fftw_real tmp132; + fftw_real tmp122; + fftw_real tmp127; + ASSERT_ALIGNED_DOUBLE; + tmp117 = tmp115 + tmp116; + tmp131 = tmp129 - tmp130; + tmp122 = tmp118 + tmp121; + tmp127 = tmp123 - tmp126; + tmp128 = K707106781 * (tmp122 + tmp127); + tmp132 = K707106781 * (tmp127 - tmp122); + c_re(output[10 * ostride]) = tmp117 - tmp128; + c_re(output[2 * ostride]) = tmp117 + tmp128; + c_im(output[14 * ostride]) = tmp131 - tmp132; + c_im(output[6 * ostride]) = tmp131 + tmp132; + } + { + fftw_real tmp133; + fftw_real tmp137; + fftw_real tmp136; + fftw_real tmp138; + fftw_real tmp134; + fftw_real tmp135; + ASSERT_ALIGNED_DOUBLE; + tmp133 = tmp115 - tmp116; + tmp137 = tmp130 + tmp129; + tmp134 = tmp121 - tmp118; + tmp135 = tmp123 + tmp126; + tmp136 = K707106781 * (tmp134 - tmp135); + tmp138 = K707106781 * (tmp134 + tmp135); + c_re(output[14 * ostride]) = tmp133 - tmp136; + c_re(output[6 * ostride]) = tmp133 + tmp136; + c_im(output[10 * ostride]) = tmp137 - tmp138; + c_im(output[2 * ostride]) = tmp137 + tmp138; + } + { + fftw_real tmp57; + fftw_real tmp89; + fftw_real tmp92; + fftw_real tmp94; + fftw_real tmp87; + fftw_real tmp93; + fftw_real tmp80; + fftw_real tmp88; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp56; + fftw_real tmp90; + fftw_real tmp91; + fftw_real tmp86; + fftw_real tmp68; + fftw_real tmp79; + ASSERT_ALIGNED_DOUBLE; + tmp56 = K707106781 * (tmp52 - tmp55); + tmp57 = tmp49 + tmp56; + tmp89 = tmp49 - tmp56; + tmp90 = (K382683432 * tmp62) - (K923879532 * tmp67); + tmp91 = (K382683432 * tmp78) + (K923879532 * tmp73); + tmp92 = tmp90 - tmp91; + tmp94 = tmp90 + tmp91; + tmp86 = K707106781 * (tmp84 - tmp85); + tmp87 = tmp83 - tmp86; + tmp93 = tmp83 + tmp86; + tmp68 = (K923879532 * tmp62) + (K382683432 * tmp67); + tmp79 = (K382683432 * tmp73) - (K923879532 * tmp78); + tmp80 = tmp68 + tmp79; + tmp88 = tmp79 - tmp68; + } + c_re(output[11 * ostride]) = tmp57 - tmp80; + c_re(output[3 * ostride]) = tmp57 + tmp80; + c_im(output[15 * ostride]) = tmp87 - tmp88; + c_im(output[7 * ostride]) = tmp87 + tmp88; + c_re(output[15 * ostride]) = tmp89 - tmp92; + c_re(output[7 * ostride]) = tmp89 + tmp92; + c_im(output[11 * ostride]) = tmp93 - tmp94; + c_im(output[3 * ostride]) = tmp93 + tmp94; + } + { + fftw_real tmp97; + fftw_real tmp109; + fftw_real tmp112; + fftw_real tmp114; + fftw_real tmp107; + fftw_real tmp113; + fftw_real tmp104; + fftw_real tmp108; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp96; + fftw_real tmp110; + fftw_real tmp111; + fftw_real tmp106; + fftw_real tmp100; + fftw_real tmp103; + ASSERT_ALIGNED_DOUBLE; + tmp96 = K707106781 * (tmp85 + tmp84); + tmp97 = tmp95 + tmp96; + tmp109 = tmp95 - tmp96; + tmp110 = (K923879532 * tmp98) - (K382683432 * tmp99); + tmp111 = (K923879532 * tmp102) + (K382683432 * tmp101); + tmp112 = tmp110 - tmp111; + tmp114 = tmp110 + tmp111; + tmp106 = K707106781 * (tmp52 + tmp55); + tmp107 = tmp105 - tmp106; + tmp113 = tmp105 + tmp106; + tmp100 = (K382683432 * tmp98) + (K923879532 * tmp99); + tmp103 = (K923879532 * tmp101) - (K382683432 * tmp102); + tmp104 = tmp100 + tmp103; + tmp108 = tmp103 - tmp100; + } + c_re(output[9 * ostride]) = tmp97 - tmp104; + c_re(output[ostride]) = tmp97 + tmp104; + c_im(output[13 * ostride]) = tmp107 - tmp108; + c_im(output[5 * ostride]) = tmp107 + tmp108; + c_re(output[13 * ostride]) = tmp109 - tmp112; + c_re(output[5 * ostride]) = tmp109 + tmp112; + c_im(output[9 * ostride]) = tmp113 - tmp114; + c_im(output[ostride]) = tmp113 + tmp114; + } +} + +fftw_codelet_desc fftw_no_twiddle_16_desc = { + "fftw_no_twiddle_16", + (void (*)()) fftw_no_twiddle_16, + 16, + FFTW_FORWARD, + FFTW_NOTW, + 353, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_2.c b/src/fftw/fn_2.c new file mode 100644 index 0000000..ca45ae3 --- /dev/null +++ b/src/fftw/fn_2.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:37 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 2 */ + +/* + * This function contains 4 FP additions, 0 FP multiplications, + * (or, 4 additions, 0 multiplications, 0 fused multiply/add), + * 4 stack variables, and 8 memory accesses + */ + +/* + * Generator Id's : + * $Id: fn_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_2(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[istride]); + c_re(output[ostride]) = tmp1 - tmp2; + c_re(output[0]) = tmp1 + tmp2; + tmp3 = c_im(input[0]); + tmp4 = c_im(input[istride]); + c_im(output[ostride]) = tmp3 - tmp4; + c_im(output[0]) = tmp3 + tmp4; +} + +fftw_codelet_desc fftw_no_twiddle_2_desc = { + "fftw_no_twiddle_2", + (void (*)()) fftw_no_twiddle_2, + 2, + FFTW_FORWARD, + FFTW_NOTW, + 45, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_3.c b/src/fftw/fn_3.c new file mode 100644 index 0000000..4f143d7 --- /dev/null +++ b/src/fftw/fn_3.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:37 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 3 */ + +/* + * This function contains 12 FP additions, 4 FP multiplications, + * (or, 10 additions, 2 multiplications, 2 fused multiply/add), + * 12 stack variables, and 12 memory accesses + */ +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); + +/* + * Generator Id's : + * $Id: fn_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_3(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp10; + fftw_real tmp4; + fftw_real tmp9; + fftw_real tmp8; + fftw_real tmp11; + fftw_real tmp5; + fftw_real tmp12; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp10 = c_im(input[0]); + { + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp6; + fftw_real tmp7; + ASSERT_ALIGNED_DOUBLE; + tmp2 = c_re(input[istride]); + tmp3 = c_re(input[2 * istride]); + tmp4 = tmp2 + tmp3; + tmp9 = K866025403 * (tmp3 - tmp2); + tmp6 = c_im(input[istride]); + tmp7 = c_im(input[2 * istride]); + tmp8 = K866025403 * (tmp6 - tmp7); + tmp11 = tmp6 + tmp7; + } + c_re(output[0]) = tmp1 + tmp4; + tmp5 = tmp1 - (K500000000 * tmp4); + c_re(output[2 * ostride]) = tmp5 - tmp8; + c_re(output[ostride]) = tmp5 + tmp8; + c_im(output[0]) = tmp10 + tmp11; + tmp12 = tmp10 - (K500000000 * tmp11); + c_im(output[ostride]) = tmp9 + tmp12; + c_im(output[2 * ostride]) = tmp12 - tmp9; +} + +fftw_codelet_desc fftw_no_twiddle_3_desc = { + "fftw_no_twiddle_3", + (void (*)()) fftw_no_twiddle_3, + 3, + FFTW_FORWARD, + FFTW_NOTW, + 67, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_32.c b/src/fftw/fn_32.c new file mode 100644 index 0000000..632b77d --- /dev/null +++ b/src/fftw/fn_32.c @@ -0,0 +1,1049 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:49 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 32 */ + +/* + * This function contains 372 FP additions, 84 FP multiplications, + * (or, 340 additions, 52 multiplications, 32 fused multiply/add), + * 92 stack variables, and 128 memory accesses + */ +static const fftw_real K831469612 = +FFTW_KONST(+0.831469612302545237078788377617905756738560812); +static const fftw_real K555570233 = +FFTW_KONST(+0.555570233019602224742830813948532874374937191); +static const fftw_real K195090322 = +FFTW_KONST(+0.195090322016128267848284868477022240927691618); +static const fftw_real K980785280 = +FFTW_KONST(+0.980785280403230449126182236134239036973933731); +static const fftw_real K923879532 = +FFTW_KONST(+0.923879532511286756128183189396788286822416626); +static const fftw_real K382683432 = +FFTW_KONST(+0.382683432365089771728459984030398866761344562); +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: fn_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_32(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp7; + fftw_real tmp275; + fftw_real tmp70; + fftw_real tmp309; + fftw_real tmp97; + fftw_real tmp215; + fftw_real tmp179; + fftw_real tmp241; + fftw_real tmp14; + fftw_real tmp310; + fftw_real tmp77; + fftw_real tmp276; + fftw_real tmp182; + fftw_real tmp216; + fftw_real tmp104; + fftw_real tmp242; + fftw_real tmp153; + fftw_real tmp233; + fftw_real tmp53; + fftw_real tmp60; + fftw_real tmp351; + fftw_real tmp306; + fftw_real tmp330; + fftw_real tmp352; + fftw_real tmp353; + fftw_real tmp354; + fftw_real tmp170; + fftw_real tmp236; + fftw_real tmp301; + fftw_real tmp329; + fftw_real tmp164; + fftw_real tmp237; + fftw_real tmp173; + fftw_real tmp234; + fftw_real tmp22; + fftw_real tmp280; + fftw_real tmp313; + fftw_real tmp85; + fftw_real tmp112; + fftw_real tmp185; + fftw_real tmp220; + fftw_real tmp245; + fftw_real tmp29; + fftw_real tmp283; + fftw_real tmp312; + fftw_real tmp92; + fftw_real tmp119; + fftw_real tmp184; + fftw_real tmp223; + fftw_real tmp244; + fftw_real tmp126; + fftw_real tmp229; + fftw_real tmp38; + fftw_real tmp45; + fftw_real tmp346; + fftw_real tmp295; + fftw_real tmp327; + fftw_real tmp347; + fftw_real tmp348; + fftw_real tmp349; + fftw_real tmp143; + fftw_real tmp226; + fftw_real tmp290; + fftw_real tmp326; + fftw_real tmp137; + fftw_real tmp227; + fftw_real tmp146; + fftw_real tmp230; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp3; + fftw_real tmp95; + fftw_real tmp66; + fftw_real tmp178; + fftw_real tmp6; + fftw_real tmp177; + fftw_real tmp69; + fftw_real tmp96; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp64; + fftw_real tmp65; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[16 * istride]); + tmp3 = tmp1 + tmp2; + tmp95 = tmp1 - tmp2; + tmp64 = c_im(input[0]); + tmp65 = c_im(input[16 * istride]); + tmp66 = tmp64 + tmp65; + tmp178 = tmp64 - tmp65; + } + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp67; + fftw_real tmp68; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[8 * istride]); + tmp5 = c_re(input[24 * istride]); + tmp6 = tmp4 + tmp5; + tmp177 = tmp4 - tmp5; + tmp67 = c_im(input[8 * istride]); + tmp68 = c_im(input[24 * istride]); + tmp69 = tmp67 + tmp68; + tmp96 = tmp67 - tmp68; + } + tmp7 = tmp3 + tmp6; + tmp275 = tmp3 - tmp6; + tmp70 = tmp66 + tmp69; + tmp309 = tmp66 - tmp69; + tmp97 = tmp95 - tmp96; + tmp215 = tmp95 + tmp96; + tmp179 = tmp177 + tmp178; + tmp241 = tmp178 - tmp177; + } + { + fftw_real tmp10; + fftw_real tmp99; + fftw_real tmp73; + fftw_real tmp98; + fftw_real tmp13; + fftw_real tmp101; + fftw_real tmp76; + fftw_real tmp102; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp71; + fftw_real tmp72; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(input[4 * istride]); + tmp9 = c_re(input[20 * istride]); + tmp10 = tmp8 + tmp9; + tmp99 = tmp8 - tmp9; + tmp71 = c_im(input[4 * istride]); + tmp72 = c_im(input[20 * istride]); + tmp73 = tmp71 + tmp72; + tmp98 = tmp71 - tmp72; + } + { + fftw_real tmp11; + fftw_real tmp12; + fftw_real tmp74; + fftw_real tmp75; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[28 * istride]); + tmp12 = c_re(input[12 * istride]); + tmp13 = tmp11 + tmp12; + tmp101 = tmp11 - tmp12; + tmp74 = c_im(input[28 * istride]); + tmp75 = c_im(input[12 * istride]); + tmp76 = tmp74 + tmp75; + tmp102 = tmp74 - tmp75; + } + tmp14 = tmp10 + tmp13; + tmp310 = tmp13 - tmp10; + tmp77 = tmp73 + tmp76; + tmp276 = tmp73 - tmp76; + { + fftw_real tmp180; + fftw_real tmp181; + fftw_real tmp100; + fftw_real tmp103; + ASSERT_ALIGNED_DOUBLE; + tmp180 = tmp101 - tmp102; + tmp181 = tmp99 + tmp98; + tmp182 = K707106781 * (tmp180 - tmp181); + tmp216 = K707106781 * (tmp181 + tmp180); + tmp100 = tmp98 - tmp99; + tmp103 = tmp101 + tmp102; + tmp104 = K707106781 * (tmp100 - tmp103); + tmp242 = K707106781 * (tmp100 + tmp103); + } + } + { + fftw_real tmp49; + fftw_real tmp149; + fftw_real tmp169; + fftw_real tmp302; + fftw_real tmp52; + fftw_real tmp166; + fftw_real tmp152; + fftw_real tmp303; + fftw_real tmp56; + fftw_real tmp157; + fftw_real tmp156; + fftw_real tmp298; + fftw_real tmp59; + fftw_real tmp159; + fftw_real tmp162; + fftw_real tmp299; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp47; + fftw_real tmp48; + fftw_real tmp167; + fftw_real tmp168; + ASSERT_ALIGNED_DOUBLE; + tmp47 = c_re(input[31 * istride]); + tmp48 = c_re(input[15 * istride]); + tmp49 = tmp47 + tmp48; + tmp149 = tmp47 - tmp48; + tmp167 = c_im(input[31 * istride]); + tmp168 = c_im(input[15 * istride]); + tmp169 = tmp167 - tmp168; + tmp302 = tmp167 + tmp168; + } + { + fftw_real tmp50; + fftw_real tmp51; + fftw_real tmp150; + fftw_real tmp151; + ASSERT_ALIGNED_DOUBLE; + tmp50 = c_re(input[7 * istride]); + tmp51 = c_re(input[23 * istride]); + tmp52 = tmp50 + tmp51; + tmp166 = tmp50 - tmp51; + tmp150 = c_im(input[7 * istride]); + tmp151 = c_im(input[23 * istride]); + tmp152 = tmp150 - tmp151; + tmp303 = tmp150 + tmp151; + } + { + fftw_real tmp54; + fftw_real tmp55; + fftw_real tmp154; + fftw_real tmp155; + ASSERT_ALIGNED_DOUBLE; + tmp54 = c_re(input[3 * istride]); + tmp55 = c_re(input[19 * istride]); + tmp56 = tmp54 + tmp55; + tmp157 = tmp54 - tmp55; + tmp154 = c_im(input[3 * istride]); + tmp155 = c_im(input[19 * istride]); + tmp156 = tmp154 - tmp155; + tmp298 = tmp154 + tmp155; + } + { + fftw_real tmp57; + fftw_real tmp58; + fftw_real tmp160; + fftw_real tmp161; + ASSERT_ALIGNED_DOUBLE; + tmp57 = c_re(input[27 * istride]); + tmp58 = c_re(input[11 * istride]); + tmp59 = tmp57 + tmp58; + tmp159 = tmp57 - tmp58; + tmp160 = c_im(input[27 * istride]); + tmp161 = c_im(input[11 * istride]); + tmp162 = tmp160 - tmp161; + tmp299 = tmp160 + tmp161; + } + { + fftw_real tmp304; + fftw_real tmp305; + fftw_real tmp297; + fftw_real tmp300; + ASSERT_ALIGNED_DOUBLE; + tmp153 = tmp149 - tmp152; + tmp233 = tmp149 + tmp152; + tmp53 = tmp49 + tmp52; + tmp60 = tmp56 + tmp59; + tmp351 = tmp53 - tmp60; + tmp304 = tmp302 - tmp303; + tmp305 = tmp59 - tmp56; + tmp306 = tmp304 - tmp305; + tmp330 = tmp305 + tmp304; + tmp352 = tmp302 + tmp303; + tmp353 = tmp298 + tmp299; + tmp354 = tmp352 - tmp353; + tmp170 = tmp166 + tmp169; + tmp236 = tmp169 - tmp166; + tmp297 = tmp49 - tmp52; + tmp300 = tmp298 - tmp299; + tmp301 = tmp297 - tmp300; + tmp329 = tmp297 + tmp300; + { + fftw_real tmp158; + fftw_real tmp163; + fftw_real tmp171; + fftw_real tmp172; + ASSERT_ALIGNED_DOUBLE; + tmp158 = tmp156 - tmp157; + tmp163 = tmp159 + tmp162; + tmp164 = K707106781 * (tmp158 - tmp163); + tmp237 = K707106781 * (tmp158 + tmp163); + tmp171 = tmp159 - tmp162; + tmp172 = tmp157 + tmp156; + tmp173 = K707106781 * (tmp171 - tmp172); + tmp234 = K707106781 * (tmp172 + tmp171); + } + } + } + { + fftw_real tmp18; + fftw_real tmp109; + fftw_real tmp81; + fftw_real tmp107; + fftw_real tmp21; + fftw_real tmp106; + fftw_real tmp84; + fftw_real tmp110; + fftw_real tmp278; + fftw_real tmp279; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp16; + fftw_real tmp17; + fftw_real tmp79; + fftw_real tmp80; + ASSERT_ALIGNED_DOUBLE; + tmp16 = c_re(input[2 * istride]); + tmp17 = c_re(input[18 * istride]); + tmp18 = tmp16 + tmp17; + tmp109 = tmp16 - tmp17; + tmp79 = c_im(input[2 * istride]); + tmp80 = c_im(input[18 * istride]); + tmp81 = tmp79 + tmp80; + tmp107 = tmp79 - tmp80; + } + { + fftw_real tmp19; + fftw_real tmp20; + fftw_real tmp82; + fftw_real tmp83; + ASSERT_ALIGNED_DOUBLE; + tmp19 = c_re(input[10 * istride]); + tmp20 = c_re(input[26 * istride]); + tmp21 = tmp19 + tmp20; + tmp106 = tmp19 - tmp20; + tmp82 = c_im(input[10 * istride]); + tmp83 = c_im(input[26 * istride]); + tmp84 = tmp82 + tmp83; + tmp110 = tmp82 - tmp83; + } + tmp22 = tmp18 + tmp21; + tmp278 = tmp81 - tmp84; + tmp279 = tmp18 - tmp21; + tmp280 = tmp278 - tmp279; + tmp313 = tmp279 + tmp278; + tmp85 = tmp81 + tmp84; + { + fftw_real tmp108; + fftw_real tmp111; + fftw_real tmp218; + fftw_real tmp219; + ASSERT_ALIGNED_DOUBLE; + tmp108 = tmp106 + tmp107; + tmp111 = tmp109 - tmp110; + tmp112 = (K382683432 * tmp108) - (K923879532 * tmp111); + tmp185 = (K923879532 * tmp108) + (K382683432 * tmp111); + tmp218 = tmp107 - tmp106; + tmp219 = tmp109 + tmp110; + tmp220 = (K923879532 * tmp218) - (K382683432 * tmp219); + tmp245 = (K382683432 * tmp218) + (K923879532 * tmp219); + } + } + { + fftw_real tmp25; + fftw_real tmp116; + fftw_real tmp88; + fftw_real tmp114; + fftw_real tmp28; + fftw_real tmp113; + fftw_real tmp91; + fftw_real tmp117; + fftw_real tmp281; + fftw_real tmp282; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp23; + fftw_real tmp24; + fftw_real tmp86; + fftw_real tmp87; + ASSERT_ALIGNED_DOUBLE; + tmp23 = c_re(input[30 * istride]); + tmp24 = c_re(input[14 * istride]); + tmp25 = tmp23 + tmp24; + tmp116 = tmp23 - tmp24; + tmp86 = c_im(input[30 * istride]); + tmp87 = c_im(input[14 * istride]); + tmp88 = tmp86 + tmp87; + tmp114 = tmp86 - tmp87; + } + { + fftw_real tmp26; + fftw_real tmp27; + fftw_real tmp89; + fftw_real tmp90; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(input[6 * istride]); + tmp27 = c_re(input[22 * istride]); + tmp28 = tmp26 + tmp27; + tmp113 = tmp26 - tmp27; + tmp89 = c_im(input[6 * istride]); + tmp90 = c_im(input[22 * istride]); + tmp91 = tmp89 + tmp90; + tmp117 = tmp89 - tmp90; + } + tmp29 = tmp25 + tmp28; + tmp281 = tmp25 - tmp28; + tmp282 = tmp88 - tmp91; + tmp283 = tmp281 + tmp282; + tmp312 = tmp281 - tmp282; + tmp92 = tmp88 + tmp91; + { + fftw_real tmp115; + fftw_real tmp118; + fftw_real tmp221; + fftw_real tmp222; + ASSERT_ALIGNED_DOUBLE; + tmp115 = tmp113 + tmp114; + tmp118 = tmp116 - tmp117; + tmp119 = (K382683432 * tmp115) + (K923879532 * tmp118); + tmp184 = (K382683432 * tmp118) - (K923879532 * tmp115); + tmp221 = tmp114 - tmp113; + tmp222 = tmp116 + tmp117; + tmp223 = (K923879532 * tmp221) + (K382683432 * tmp222); + tmp244 = (K923879532 * tmp222) - (K382683432 * tmp221); + } + } + { + fftw_real tmp34; + fftw_real tmp139; + fftw_real tmp125; + fftw_real tmp286; + fftw_real tmp37; + fftw_real tmp122; + fftw_real tmp142; + fftw_real tmp287; + fftw_real tmp41; + fftw_real tmp132; + fftw_real tmp135; + fftw_real tmp292; + fftw_real tmp44; + fftw_real tmp127; + fftw_real tmp130; + fftw_real tmp293; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp32; + fftw_real tmp33; + fftw_real tmp123; + fftw_real tmp124; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(input[istride]); + tmp33 = c_re(input[17 * istride]); + tmp34 = tmp32 + tmp33; + tmp139 = tmp32 - tmp33; + tmp123 = c_im(input[istride]); + tmp124 = c_im(input[17 * istride]); + tmp125 = tmp123 - tmp124; + tmp286 = tmp123 + tmp124; + } + { + fftw_real tmp35; + fftw_real tmp36; + fftw_real tmp140; + fftw_real tmp141; + ASSERT_ALIGNED_DOUBLE; + tmp35 = c_re(input[9 * istride]); + tmp36 = c_re(input[25 * istride]); + tmp37 = tmp35 + tmp36; + tmp122 = tmp35 - tmp36; + tmp140 = c_im(input[9 * istride]); + tmp141 = c_im(input[25 * istride]); + tmp142 = tmp140 - tmp141; + tmp287 = tmp140 + tmp141; + } + { + fftw_real tmp39; + fftw_real tmp40; + fftw_real tmp133; + fftw_real tmp134; + ASSERT_ALIGNED_DOUBLE; + tmp39 = c_re(input[5 * istride]); + tmp40 = c_re(input[21 * istride]); + tmp41 = tmp39 + tmp40; + tmp132 = tmp39 - tmp40; + tmp133 = c_im(input[5 * istride]); + tmp134 = c_im(input[21 * istride]); + tmp135 = tmp133 - tmp134; + tmp292 = tmp133 + tmp134; + } + { + fftw_real tmp42; + fftw_real tmp43; + fftw_real tmp128; + fftw_real tmp129; + ASSERT_ALIGNED_DOUBLE; + tmp42 = c_re(input[29 * istride]); + tmp43 = c_re(input[13 * istride]); + tmp44 = tmp42 + tmp43; + tmp127 = tmp42 - tmp43; + tmp128 = c_im(input[29 * istride]); + tmp129 = c_im(input[13 * istride]); + tmp130 = tmp128 - tmp129; + tmp293 = tmp128 + tmp129; + } + { + fftw_real tmp291; + fftw_real tmp294; + fftw_real tmp288; + fftw_real tmp289; + ASSERT_ALIGNED_DOUBLE; + tmp126 = tmp122 + tmp125; + tmp229 = tmp125 - tmp122; + tmp38 = tmp34 + tmp37; + tmp45 = tmp41 + tmp44; + tmp346 = tmp38 - tmp45; + tmp291 = tmp34 - tmp37; + tmp294 = tmp292 - tmp293; + tmp295 = tmp291 - tmp294; + tmp327 = tmp291 + tmp294; + tmp347 = tmp286 + tmp287; + tmp348 = tmp292 + tmp293; + tmp349 = tmp347 - tmp348; + tmp143 = tmp139 - tmp142; + tmp226 = tmp139 + tmp142; + tmp288 = tmp286 - tmp287; + tmp289 = tmp44 - tmp41; + tmp290 = tmp288 - tmp289; + tmp326 = tmp289 + tmp288; + { + fftw_real tmp131; + fftw_real tmp136; + fftw_real tmp144; + fftw_real tmp145; + ASSERT_ALIGNED_DOUBLE; + tmp131 = tmp127 - tmp130; + tmp136 = tmp132 + tmp135; + tmp137 = K707106781 * (tmp131 - tmp136); + tmp227 = K707106781 * (tmp136 + tmp131); + tmp144 = tmp135 - tmp132; + tmp145 = tmp127 + tmp130; + tmp146 = K707106781 * (tmp144 - tmp145); + tmp230 = K707106781 * (tmp144 + tmp145); + } + } + } + { + fftw_real tmp285; + fftw_real tmp317; + fftw_real tmp320; + fftw_real tmp322; + fftw_real tmp308; + fftw_real tmp316; + fftw_real tmp315; + fftw_real tmp321; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp277; + fftw_real tmp284; + fftw_real tmp318; + fftw_real tmp319; + ASSERT_ALIGNED_DOUBLE; + tmp277 = tmp275 - tmp276; + tmp284 = K707106781 * (tmp280 - tmp283); + tmp285 = tmp277 + tmp284; + tmp317 = tmp277 - tmp284; + tmp318 = (K382683432 * tmp290) - (K923879532 * tmp295); + tmp319 = (K382683432 * tmp306) + (K923879532 * tmp301); + tmp320 = tmp318 - tmp319; + tmp322 = tmp318 + tmp319; + } + { + fftw_real tmp296; + fftw_real tmp307; + fftw_real tmp311; + fftw_real tmp314; + ASSERT_ALIGNED_DOUBLE; + tmp296 = (K923879532 * tmp290) + (K382683432 * tmp295); + tmp307 = (K382683432 * tmp301) - (K923879532 * tmp306); + tmp308 = tmp296 + tmp307; + tmp316 = tmp307 - tmp296; + tmp311 = tmp309 - tmp310; + tmp314 = K707106781 * (tmp312 - tmp313); + tmp315 = tmp311 - tmp314; + tmp321 = tmp311 + tmp314; + } + c_re(output[22 * ostride]) = tmp285 - tmp308; + c_re(output[6 * ostride]) = tmp285 + tmp308; + c_im(output[30 * ostride]) = tmp315 - tmp316; + c_im(output[14 * ostride]) = tmp315 + tmp316; + c_re(output[30 * ostride]) = tmp317 - tmp320; + c_re(output[14 * ostride]) = tmp317 + tmp320; + c_im(output[22 * ostride]) = tmp321 - tmp322; + c_im(output[6 * ostride]) = tmp321 + tmp322; + } + { + fftw_real tmp325; + fftw_real tmp337; + fftw_real tmp340; + fftw_real tmp342; + fftw_real tmp332; + fftw_real tmp336; + fftw_real tmp335; + fftw_real tmp341; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp323; + fftw_real tmp324; + fftw_real tmp338; + fftw_real tmp339; + ASSERT_ALIGNED_DOUBLE; + tmp323 = tmp275 + tmp276; + tmp324 = K707106781 * (tmp313 + tmp312); + tmp325 = tmp323 + tmp324; + tmp337 = tmp323 - tmp324; + tmp338 = (K923879532 * tmp326) - (K382683432 * tmp327); + tmp339 = (K923879532 * tmp330) + (K382683432 * tmp329); + tmp340 = tmp338 - tmp339; + tmp342 = tmp338 + tmp339; + } + { + fftw_real tmp328; + fftw_real tmp331; + fftw_real tmp333; + fftw_real tmp334; + ASSERT_ALIGNED_DOUBLE; + tmp328 = (K382683432 * tmp326) + (K923879532 * tmp327); + tmp331 = (K923879532 * tmp329) - (K382683432 * tmp330); + tmp332 = tmp328 + tmp331; + tmp336 = tmp331 - tmp328; + tmp333 = tmp310 + tmp309; + tmp334 = K707106781 * (tmp280 + tmp283); + tmp335 = tmp333 - tmp334; + tmp341 = tmp333 + tmp334; + } + c_re(output[18 * ostride]) = tmp325 - tmp332; + c_re(output[2 * ostride]) = tmp325 + tmp332; + c_im(output[26 * ostride]) = tmp335 - tmp336; + c_im(output[10 * ostride]) = tmp335 + tmp336; + c_re(output[26 * ostride]) = tmp337 - tmp340; + c_re(output[10 * ostride]) = tmp337 + tmp340; + c_im(output[18 * ostride]) = tmp341 - tmp342; + c_im(output[2 * ostride]) = tmp341 + tmp342; + } + { + fftw_real tmp345; + fftw_real tmp361; + fftw_real tmp364; + fftw_real tmp366; + fftw_real tmp356; + fftw_real tmp360; + fftw_real tmp359; + fftw_real tmp365; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp343; + fftw_real tmp344; + fftw_real tmp362; + fftw_real tmp363; + ASSERT_ALIGNED_DOUBLE; + tmp343 = tmp7 - tmp14; + tmp344 = tmp85 - tmp92; + tmp345 = tmp343 + tmp344; + tmp361 = tmp343 - tmp344; + tmp362 = tmp349 - tmp346; + tmp363 = tmp351 + tmp354; + tmp364 = K707106781 * (tmp362 - tmp363); + tmp366 = K707106781 * (tmp362 + tmp363); + } + { + fftw_real tmp350; + fftw_real tmp355; + fftw_real tmp357; + fftw_real tmp358; + ASSERT_ALIGNED_DOUBLE; + tmp350 = tmp346 + tmp349; + tmp355 = tmp351 - tmp354; + tmp356 = K707106781 * (tmp350 + tmp355); + tmp360 = K707106781 * (tmp355 - tmp350); + tmp357 = tmp70 - tmp77; + tmp358 = tmp29 - tmp22; + tmp359 = tmp357 - tmp358; + tmp365 = tmp358 + tmp357; + } + c_re(output[20 * ostride]) = tmp345 - tmp356; + c_re(output[4 * ostride]) = tmp345 + tmp356; + c_im(output[28 * ostride]) = tmp359 - tmp360; + c_im(output[12 * ostride]) = tmp359 + tmp360; + c_re(output[28 * ostride]) = tmp361 - tmp364; + c_re(output[12 * ostride]) = tmp361 + tmp364; + c_im(output[20 * ostride]) = tmp365 - tmp366; + c_im(output[4 * ostride]) = tmp365 + tmp366; + } + { + fftw_real tmp31; + fftw_real tmp367; + fftw_real tmp370; + fftw_real tmp372; + fftw_real tmp62; + fftw_real tmp63; + fftw_real tmp94; + fftw_real tmp371; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp15; + fftw_real tmp30; + fftw_real tmp368; + fftw_real tmp369; + ASSERT_ALIGNED_DOUBLE; + tmp15 = tmp7 + tmp14; + tmp30 = tmp22 + tmp29; + tmp31 = tmp15 + tmp30; + tmp367 = tmp15 - tmp30; + tmp368 = tmp347 + tmp348; + tmp369 = tmp352 + tmp353; + tmp370 = tmp368 - tmp369; + tmp372 = tmp368 + tmp369; + } + { + fftw_real tmp46; + fftw_real tmp61; + fftw_real tmp78; + fftw_real tmp93; + ASSERT_ALIGNED_DOUBLE; + tmp46 = tmp38 + tmp45; + tmp61 = tmp53 + tmp60; + tmp62 = tmp46 + tmp61; + tmp63 = tmp61 - tmp46; + tmp78 = tmp70 + tmp77; + tmp93 = tmp85 + tmp92; + tmp94 = tmp78 - tmp93; + tmp371 = tmp78 + tmp93; + } + c_re(output[16 * ostride]) = tmp31 - tmp62; + c_re(output[0]) = tmp31 + tmp62; + c_im(output[8 * ostride]) = tmp63 + tmp94; + c_im(output[24 * ostride]) = tmp94 - tmp63; + c_re(output[24 * ostride]) = tmp367 - tmp370; + c_re(output[8 * ostride]) = tmp367 + tmp370; + c_im(output[16 * ostride]) = tmp371 - tmp372; + c_im(output[0]) = tmp371 + tmp372; + } + { + fftw_real tmp121; + fftw_real tmp189; + fftw_real tmp187; + fftw_real tmp193; + fftw_real tmp148; + fftw_real tmp190; + fftw_real tmp175; + fftw_real tmp191; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp105; + fftw_real tmp120; + fftw_real tmp183; + fftw_real tmp186; + ASSERT_ALIGNED_DOUBLE; + tmp105 = tmp97 - tmp104; + tmp120 = tmp112 - tmp119; + tmp121 = tmp105 + tmp120; + tmp189 = tmp105 - tmp120; + tmp183 = tmp179 - tmp182; + tmp186 = tmp184 - tmp185; + tmp187 = tmp183 - tmp186; + tmp193 = tmp183 + tmp186; + } + { + fftw_real tmp138; + fftw_real tmp147; + fftw_real tmp165; + fftw_real tmp174; + ASSERT_ALIGNED_DOUBLE; + tmp138 = tmp126 - tmp137; + tmp147 = tmp143 - tmp146; + tmp148 = (K980785280 * tmp138) + (K195090322 * tmp147); + tmp190 = (K195090322 * tmp138) - (K980785280 * tmp147); + tmp165 = tmp153 - tmp164; + tmp174 = tmp170 - tmp173; + tmp175 = (K195090322 * tmp165) - (K980785280 * tmp174); + tmp191 = (K195090322 * tmp174) + (K980785280 * tmp165); + } + { + fftw_real tmp176; + fftw_real tmp188; + fftw_real tmp192; + fftw_real tmp194; + ASSERT_ALIGNED_DOUBLE; + tmp176 = tmp148 + tmp175; + c_re(output[23 * ostride]) = tmp121 - tmp176; + c_re(output[7 * ostride]) = tmp121 + tmp176; + tmp188 = tmp175 - tmp148; + c_im(output[31 * ostride]) = tmp187 - tmp188; + c_im(output[15 * ostride]) = tmp187 + tmp188; + tmp192 = tmp190 - tmp191; + c_re(output[31 * ostride]) = tmp189 - tmp192; + c_re(output[15 * ostride]) = tmp189 + tmp192; + tmp194 = tmp190 + tmp191; + c_im(output[23 * ostride]) = tmp193 - tmp194; + c_im(output[7 * ostride]) = tmp193 + tmp194; + } + } + { + fftw_real tmp197; + fftw_real tmp209; + fftw_real tmp207; + fftw_real tmp213; + fftw_real tmp200; + fftw_real tmp210; + fftw_real tmp203; + fftw_real tmp211; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp195; + fftw_real tmp196; + fftw_real tmp205; + fftw_real tmp206; + ASSERT_ALIGNED_DOUBLE; + tmp195 = tmp97 + tmp104; + tmp196 = tmp185 + tmp184; + tmp197 = tmp195 + tmp196; + tmp209 = tmp195 - tmp196; + tmp205 = tmp179 + tmp182; + tmp206 = tmp112 + tmp119; + tmp207 = tmp205 - tmp206; + tmp213 = tmp205 + tmp206; + } + { + fftw_real tmp198; + fftw_real tmp199; + fftw_real tmp201; + fftw_real tmp202; + ASSERT_ALIGNED_DOUBLE; + tmp198 = tmp126 + tmp137; + tmp199 = tmp143 + tmp146; + tmp200 = (K555570233 * tmp198) + (K831469612 * tmp199); + tmp210 = (K831469612 * tmp198) - (K555570233 * tmp199); + tmp201 = tmp153 + tmp164; + tmp202 = tmp170 + tmp173; + tmp203 = (K831469612 * tmp201) - (K555570233 * tmp202); + tmp211 = (K831469612 * tmp202) + (K555570233 * tmp201); + } + { + fftw_real tmp204; + fftw_real tmp208; + fftw_real tmp212; + fftw_real tmp214; + ASSERT_ALIGNED_DOUBLE; + tmp204 = tmp200 + tmp203; + c_re(output[19 * ostride]) = tmp197 - tmp204; + c_re(output[3 * ostride]) = tmp197 + tmp204; + tmp208 = tmp203 - tmp200; + c_im(output[27 * ostride]) = tmp207 - tmp208; + c_im(output[11 * ostride]) = tmp207 + tmp208; + tmp212 = tmp210 - tmp211; + c_re(output[27 * ostride]) = tmp209 - tmp212; + c_re(output[11 * ostride]) = tmp209 + tmp212; + tmp214 = tmp210 + tmp211; + c_im(output[19 * ostride]) = tmp213 - tmp214; + c_im(output[3 * ostride]) = tmp213 + tmp214; + } + } + { + fftw_real tmp225; + fftw_real tmp249; + fftw_real tmp247; + fftw_real tmp253; + fftw_real tmp232; + fftw_real tmp250; + fftw_real tmp239; + fftw_real tmp251; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp217; + fftw_real tmp224; + fftw_real tmp243; + fftw_real tmp246; + ASSERT_ALIGNED_DOUBLE; + tmp217 = tmp215 - tmp216; + tmp224 = tmp220 - tmp223; + tmp225 = tmp217 + tmp224; + tmp249 = tmp217 - tmp224; + tmp243 = tmp241 - tmp242; + tmp246 = tmp244 - tmp245; + tmp247 = tmp243 - tmp246; + tmp253 = tmp243 + tmp246; + } + { + fftw_real tmp228; + fftw_real tmp231; + fftw_real tmp235; + fftw_real tmp238; + ASSERT_ALIGNED_DOUBLE; + tmp228 = tmp226 - tmp227; + tmp231 = tmp229 - tmp230; + tmp232 = (K555570233 * tmp228) + (K831469612 * tmp231); + tmp250 = (K555570233 * tmp231) - (K831469612 * tmp228); + tmp235 = tmp233 - tmp234; + tmp238 = tmp236 - tmp237; + tmp239 = (K555570233 * tmp235) - (K831469612 * tmp238); + tmp251 = (K831469612 * tmp235) + (K555570233 * tmp238); + } + { + fftw_real tmp240; + fftw_real tmp248; + fftw_real tmp252; + fftw_real tmp254; + ASSERT_ALIGNED_DOUBLE; + tmp240 = tmp232 + tmp239; + c_re(output[21 * ostride]) = tmp225 - tmp240; + c_re(output[5 * ostride]) = tmp225 + tmp240; + tmp248 = tmp239 - tmp232; + c_im(output[29 * ostride]) = tmp247 - tmp248; + c_im(output[13 * ostride]) = tmp247 + tmp248; + tmp252 = tmp250 - tmp251; + c_re(output[29 * ostride]) = tmp249 - tmp252; + c_re(output[13 * ostride]) = tmp249 + tmp252; + tmp254 = tmp250 + tmp251; + c_im(output[21 * ostride]) = tmp253 - tmp254; + c_im(output[5 * ostride]) = tmp253 + tmp254; + } + } + { + fftw_real tmp257; + fftw_real tmp269; + fftw_real tmp267; + fftw_real tmp273; + fftw_real tmp260; + fftw_real tmp270; + fftw_real tmp263; + fftw_real tmp271; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp255; + fftw_real tmp256; + fftw_real tmp265; + fftw_real tmp266; + ASSERT_ALIGNED_DOUBLE; + tmp255 = tmp215 + tmp216; + tmp256 = tmp245 + tmp244; + tmp257 = tmp255 + tmp256; + tmp269 = tmp255 - tmp256; + tmp265 = tmp241 + tmp242; + tmp266 = tmp220 + tmp223; + tmp267 = tmp265 - tmp266; + tmp273 = tmp265 + tmp266; + } + { + fftw_real tmp258; + fftw_real tmp259; + fftw_real tmp261; + fftw_real tmp262; + ASSERT_ALIGNED_DOUBLE; + tmp258 = tmp226 + tmp227; + tmp259 = tmp229 + tmp230; + tmp260 = (K980785280 * tmp258) + (K195090322 * tmp259); + tmp270 = (K980785280 * tmp259) - (K195090322 * tmp258); + tmp261 = tmp233 + tmp234; + tmp262 = tmp236 + tmp237; + tmp263 = (K980785280 * tmp261) - (K195090322 * tmp262); + tmp271 = (K195090322 * tmp261) + (K980785280 * tmp262); + } + { + fftw_real tmp264; + fftw_real tmp268; + fftw_real tmp272; + fftw_real tmp274; + ASSERT_ALIGNED_DOUBLE; + tmp264 = tmp260 + tmp263; + c_re(output[17 * ostride]) = tmp257 - tmp264; + c_re(output[ostride]) = tmp257 + tmp264; + tmp268 = tmp263 - tmp260; + c_im(output[25 * ostride]) = tmp267 - tmp268; + c_im(output[9 * ostride]) = tmp267 + tmp268; + tmp272 = tmp270 - tmp271; + c_re(output[25 * ostride]) = tmp269 - tmp272; + c_re(output[9 * ostride]) = tmp269 + tmp272; + tmp274 = tmp270 + tmp271; + c_im(output[17 * ostride]) = tmp273 - tmp274; + c_im(output[ostride]) = tmp273 + tmp274; + } + } +} + +fftw_codelet_desc fftw_no_twiddle_32_desc = { + "fftw_no_twiddle_32", + (void (*)()) fftw_no_twiddle_32, + 32, + FFTW_FORWARD, + FFTW_NOTW, + 705, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_4.c b/src/fftw/fn_4.c new file mode 100644 index 0000000..23bd538 --- /dev/null +++ b/src/fftw/fn_4.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:37 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 4 */ + +/* + * This function contains 16 FP additions, 0 FP multiplications, + * (or, 16 additions, 0 multiplications, 0 fused multiply/add), + * 12 stack variables, and 16 memory accesses + */ + +/* + * Generator Id's : + * $Id: fn_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_4(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp3; + fftw_real tmp11; + fftw_real tmp9; + fftw_real tmp15; + fftw_real tmp6; + fftw_real tmp10; + fftw_real tmp14; + fftw_real tmp16; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp7; + fftw_real tmp8; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[2 * istride]); + tmp3 = tmp1 + tmp2; + tmp11 = tmp1 - tmp2; + tmp7 = c_im(input[0]); + tmp8 = c_im(input[2 * istride]); + tmp9 = tmp7 - tmp8; + tmp15 = tmp7 + tmp8; + } + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp12; + fftw_real tmp13; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[istride]); + tmp5 = c_re(input[3 * istride]); + tmp6 = tmp4 + tmp5; + tmp10 = tmp4 - tmp5; + tmp12 = c_im(input[istride]); + tmp13 = c_im(input[3 * istride]); + tmp14 = tmp12 - tmp13; + tmp16 = tmp12 + tmp13; + } + c_re(output[2 * ostride]) = tmp3 - tmp6; + c_re(output[0]) = tmp3 + tmp6; + c_im(output[ostride]) = tmp9 - tmp10; + c_im(output[3 * ostride]) = tmp10 + tmp9; + c_re(output[3 * ostride]) = tmp11 - tmp14; + c_re(output[ostride]) = tmp11 + tmp14; + c_im(output[2 * ostride]) = tmp15 - tmp16; + c_im(output[0]) = tmp15 + tmp16; +} + +fftw_codelet_desc fftw_no_twiddle_4_desc = { + "fftw_no_twiddle_4", + (void (*)()) fftw_no_twiddle_4, + 4, + FFTW_FORWARD, + FFTW_NOTW, + 89, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_5.c b/src/fftw/fn_5.c new file mode 100644 index 0000000..03e5460 --- /dev/null +++ b/src/fftw/fn_5.c @@ -0,0 +1,150 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:37 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 5 */ + +/* + * This function contains 32 FP additions, 12 FP multiplications, + * (or, 26 additions, 6 multiplications, 6 fused multiply/add), + * 16 stack variables, and 20 memory accesses + */ +static const fftw_real K250000000 = +FFTW_KONST(+0.250000000000000000000000000000000000000000000); +static const fftw_real K587785252 = +FFTW_KONST(+0.587785252292473129168705954639072768597652438); +static const fftw_real K951056516 = +FFTW_KONST(+0.951056516295153572116439333379382143405698634); +static const fftw_real K559016994 = +FFTW_KONST(+0.559016994374947424102293417182819058860154590); + +/* + * Generator Id's : + * $Id: fn_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_5(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp24; + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp28; + fftw_real tmp29; + fftw_real tmp14; + fftw_real tmp25; + fftw_real tmp23; + fftw_real tmp17; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp24 = c_im(input[0]); + { + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp6; + fftw_real tmp7; + ASSERT_ALIGNED_DOUBLE; + tmp2 = c_re(input[istride]); + tmp3 = c_re(input[4 * istride]); + tmp4 = tmp2 + tmp3; + tmp5 = c_re(input[2 * istride]); + tmp6 = c_re(input[3 * istride]); + tmp7 = tmp5 + tmp6; + tmp8 = tmp4 + tmp7; + tmp9 = K559016994 * (tmp4 - tmp7); + tmp28 = tmp2 - tmp3; + tmp29 = tmp5 - tmp6; + } + { + fftw_real tmp12; + fftw_real tmp13; + fftw_real tmp21; + fftw_real tmp15; + fftw_real tmp16; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp12 = c_im(input[istride]); + tmp13 = c_im(input[4 * istride]); + tmp21 = tmp12 + tmp13; + tmp15 = c_im(input[2 * istride]); + tmp16 = c_im(input[3 * istride]); + tmp22 = tmp15 + tmp16; + tmp14 = tmp12 - tmp13; + tmp25 = tmp21 + tmp22; + tmp23 = K559016994 * (tmp21 - tmp22); + tmp17 = tmp15 - tmp16; + } + c_re(output[0]) = tmp1 + tmp8; + { + fftw_real tmp18; + fftw_real tmp20; + fftw_real tmp11; + fftw_real tmp19; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp18 = (K951056516 * tmp14) + (K587785252 * tmp17); + tmp20 = (K951056516 * tmp17) - (K587785252 * tmp14); + tmp10 = tmp1 - (K250000000 * tmp8); + tmp11 = tmp9 + tmp10; + tmp19 = tmp10 - tmp9; + c_re(output[4 * ostride]) = tmp11 - tmp18; + c_re(output[ostride]) = tmp11 + tmp18; + c_re(output[2 * ostride]) = tmp19 - tmp20; + c_re(output[3 * ostride]) = tmp19 + tmp20; + } + c_im(output[0]) = tmp25 + tmp24; + { + fftw_real tmp30; + fftw_real tmp31; + fftw_real tmp27; + fftw_real tmp32; + fftw_real tmp26; + ASSERT_ALIGNED_DOUBLE; + tmp30 = (K951056516 * tmp28) + (K587785252 * tmp29); + tmp31 = (K951056516 * tmp29) - (K587785252 * tmp28); + tmp26 = tmp24 - (K250000000 * tmp25); + tmp27 = tmp23 + tmp26; + tmp32 = tmp26 - tmp23; + c_im(output[ostride]) = tmp27 - tmp30; + c_im(output[4 * ostride]) = tmp30 + tmp27; + c_im(output[2 * ostride]) = tmp31 + tmp32; + c_im(output[3 * ostride]) = tmp32 - tmp31; + } +} + +fftw_codelet_desc fftw_no_twiddle_5_desc = { + "fftw_no_twiddle_5", + (void (*)()) fftw_no_twiddle_5, + 5, + FFTW_FORWARD, + FFTW_NOTW, + 111, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_6.c b/src/fftw/fn_6.c new file mode 100644 index 0000000..c034d04 --- /dev/null +++ b/src/fftw/fn_6.c @@ -0,0 +1,159 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:37 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 6 */ + +/* + * This function contains 36 FP additions, 8 FP multiplications, + * (or, 32 additions, 4 multiplications, 4 fused multiply/add), + * 20 stack variables, and 24 memory accesses + */ +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); + +/* + * Generator Id's : + * $Id: fn_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_6(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp3; + fftw_real tmp11; + fftw_real tmp26; + fftw_real tmp33; + fftw_real tmp6; + fftw_real tmp12; + fftw_real tmp9; + fftw_real tmp13; + fftw_real tmp10; + fftw_real tmp14; + fftw_real tmp18; + fftw_real tmp30; + fftw_real tmp21; + fftw_real tmp31; + fftw_real tmp27; + fftw_real tmp34; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp24; + fftw_real tmp25; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[3 * istride]); + tmp3 = tmp1 - tmp2; + tmp11 = tmp1 + tmp2; + tmp24 = c_im(input[0]); + tmp25 = c_im(input[3 * istride]); + tmp26 = tmp24 - tmp25; + tmp33 = tmp24 + tmp25; + } + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp7; + fftw_real tmp8; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[2 * istride]); + tmp5 = c_re(input[5 * istride]); + tmp6 = tmp4 - tmp5; + tmp12 = tmp4 + tmp5; + tmp7 = c_re(input[4 * istride]); + tmp8 = c_re(input[istride]); + tmp9 = tmp7 - tmp8; + tmp13 = tmp7 + tmp8; + } + tmp10 = tmp6 + tmp9; + tmp14 = tmp12 + tmp13; + { + fftw_real tmp16; + fftw_real tmp17; + fftw_real tmp19; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + tmp16 = c_im(input[2 * istride]); + tmp17 = c_im(input[5 * istride]); + tmp18 = tmp16 - tmp17; + tmp30 = tmp16 + tmp17; + tmp19 = c_im(input[4 * istride]); + tmp20 = c_im(input[istride]); + tmp21 = tmp19 - tmp20; + tmp31 = tmp19 + tmp20; + } + tmp27 = tmp18 + tmp21; + tmp34 = tmp30 + tmp31; + { + fftw_real tmp15; + fftw_real tmp22; + fftw_real tmp29; + fftw_real tmp32; + ASSERT_ALIGNED_DOUBLE; + c_re(output[3 * ostride]) = tmp3 + tmp10; + tmp15 = tmp3 - (K500000000 * tmp10); + tmp22 = K866025403 * (tmp18 - tmp21); + c_re(output[5 * ostride]) = tmp15 - tmp22; + c_re(output[ostride]) = tmp15 + tmp22; + c_re(output[0]) = tmp11 + tmp14; + tmp29 = tmp11 - (K500000000 * tmp14); + tmp32 = K866025403 * (tmp30 - tmp31); + c_re(output[2 * ostride]) = tmp29 - tmp32; + c_re(output[4 * ostride]) = tmp29 + tmp32; + } + { + fftw_real tmp23; + fftw_real tmp28; + fftw_real tmp35; + fftw_real tmp36; + ASSERT_ALIGNED_DOUBLE; + c_im(output[3 * ostride]) = tmp26 + tmp27; + tmp23 = K866025403 * (tmp9 - tmp6); + tmp28 = tmp26 - (K500000000 * tmp27); + c_im(output[ostride]) = tmp23 + tmp28; + c_im(output[5 * ostride]) = tmp28 - tmp23; + c_im(output[0]) = tmp33 + tmp34; + tmp35 = tmp33 - (K500000000 * tmp34); + tmp36 = K866025403 * (tmp13 - tmp12); + c_im(output[2 * ostride]) = tmp35 - tmp36; + c_im(output[4 * ostride]) = tmp36 + tmp35; + } +} + +fftw_codelet_desc fftw_no_twiddle_6_desc = { + "fftw_no_twiddle_6", + (void (*)()) fftw_no_twiddle_6, + 6, + FFTW_FORWARD, + FFTW_NOTW, + 133, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_64.c b/src/fftw/fn_64.c new file mode 100644 index 0000000..08ab4b6 --- /dev/null +++ b/src/fftw/fn_64.c @@ -0,0 +1,2464 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:51 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 64 */ + +/* + * This function contains 912 FP additions, 248 FP multiplications, + * (or, 808 additions, 144 multiplications, 104 fused multiply/add), + * 156 stack variables, and 256 memory accesses + */ +static const fftw_real K555570233 = +FFTW_KONST(+0.555570233019602224742830813948532874374937191); +static const fftw_real K831469612 = +FFTW_KONST(+0.831469612302545237078788377617905756738560812); +static const fftw_real K881921264 = +FFTW_KONST(+0.881921264348355029712756863660388349508442621); +static const fftw_real K471396736 = +FFTW_KONST(+0.471396736825997648556387625905254377657460319); +static const fftw_real K290284677 = +FFTW_KONST(+0.290284677254462367636192375817395274691476278); +static const fftw_real K956940335 = +FFTW_KONST(+0.956940335732208864935797886980269969482849206); +static const fftw_real K980785280 = +FFTW_KONST(+0.980785280403230449126182236134239036973933731); +static const fftw_real K195090322 = +FFTW_KONST(+0.195090322016128267848284868477022240927691618); +static const fftw_real K995184726 = +FFTW_KONST(+0.995184726672196886244836953109479921575474869); +static const fftw_real K098017140 = +FFTW_KONST(+0.098017140329560601994195563888641845861136673); +static const fftw_real K773010453 = +FFTW_KONST(+0.773010453362736960810906609758469800971041293); +static const fftw_real K634393284 = +FFTW_KONST(+0.634393284163645498215171613225493370675687095); +static const fftw_real K923879532 = +FFTW_KONST(+0.923879532511286756128183189396788286822416626); +static const fftw_real K382683432 = +FFTW_KONST(+0.382683432365089771728459984030398866761344562); +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: fn_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_64(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp193; + fftw_real tmp471; + fftw_real tmp15; + fftw_real tmp815; + fftw_real tmp719; + fftw_real tmp781; + fftw_real tmp142; + fftw_real tmp849; + fftw_real tmp371; + fftw_real tmp537; + fftw_real tmp637; + fftw_real tmp755; + fftw_real tmp200; + fftw_real tmp538; + fftw_real tmp374; + fftw_real tmp472; + fftw_real tmp109; + fftw_real tmp837; + fftw_real tmp693; + fftw_real tmp773; + fftw_real tmp844; + fftw_real tmp892; + fftw_real tmp710; + fftw_real tmp776; + fftw_real tmp329; + fftw_real tmp429; + fftw_real tmp519; + fftw_real tmp593; + fftw_real tmp362; + fftw_real tmp432; + fftw_real tmp530; + fftw_real tmp596; + fftw_real tmp30; + fftw_real tmp850; + fftw_real tmp640; + fftw_real tmp721; + fftw_real tmp157; + fftw_real tmp816; + fftw_real tmp643; + fftw_real tmp720; + fftw_real tmp208; + fftw_real tmp377; + fftw_real tmp476; + fftw_real tmp541; + fftw_real tmp215; + fftw_real tmp376; + fftw_real tmp479; + fftw_real tmp540; + fftw_real tmp124; + fftw_real tmp845; + fftw_real tmp365; + fftw_real tmp430; + fftw_real tmp352; + fftw_real tmp433; + fftw_real tmp840; + fftw_real tmp893; + fftw_real tmp526; + fftw_real tmp597; + fftw_real tmp533; + fftw_real tmp594; + fftw_real tmp704; + fftw_real tmp777; + fftw_real tmp713; + fftw_real tmp774; + fftw_real tmp46; + fftw_real tmp819; + fftw_real tmp648; + fftw_real tmp758; + fftw_real tmp173; + fftw_real tmp818; + fftw_real tmp651; + fftw_real tmp759; + fftw_real tmp228; + fftw_real tmp414; + fftw_real tmp484; + fftw_real tmp578; + fftw_real tmp235; + fftw_real tmp415; + fftw_real tmp487; + fftw_real tmp579; + fftw_real tmp78; + fftw_real tmp831; + fftw_real tmp666; + fftw_real tmp769; + fftw_real tmp828; + fftw_real tmp887; + fftw_real tmp683; + fftw_real tmp766; + fftw_real tmp274; + fftw_real tmp425; + fftw_real tmp500; + fftw_real tmp589; + fftw_real tmp307; + fftw_real tmp422; + fftw_real tmp511; + fftw_real tmp586; + fftw_real tmp61; + fftw_real tmp821; + fftw_real tmp655; + fftw_real tmp761; + fftw_real tmp188; + fftw_real tmp822; + fftw_real tmp658; + fftw_real tmp762; + fftw_real tmp247; + fftw_real tmp417; + fftw_real tmp491; + fftw_real tmp581; + fftw_real tmp254; + fftw_real tmp418; + fftw_real tmp494; + fftw_real tmp582; + fftw_real tmp93; + fftw_real tmp829; + fftw_real tmp310; + fftw_real tmp426; + fftw_real tmp297; + fftw_real tmp423; + fftw_real tmp834; + fftw_real tmp888; + fftw_real tmp507; + fftw_real tmp587; + fftw_real tmp514; + fftw_real tmp590; + fftw_real tmp677; + fftw_real tmp767; + fftw_real tmp686; + fftw_real tmp770; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp3; + fftw_real tmp191; + fftw_real tmp130; + fftw_real tmp370; + fftw_real tmp6; + fftw_real tmp369; + fftw_real tmp133; + fftw_real tmp192; + fftw_real tmp10; + fftw_real tmp195; + fftw_real tmp137; + fftw_real tmp194; + fftw_real tmp13; + fftw_real tmp197; + fftw_real tmp140; + fftw_real tmp198; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp128; + fftw_real tmp129; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[32 * istride]); + tmp3 = tmp1 + tmp2; + tmp191 = tmp1 - tmp2; + tmp128 = c_im(input[0]); + tmp129 = c_im(input[32 * istride]); + tmp130 = tmp128 + tmp129; + tmp370 = tmp128 - tmp129; + } + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp131; + fftw_real tmp132; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[16 * istride]); + tmp5 = c_re(input[48 * istride]); + tmp6 = tmp4 + tmp5; + tmp369 = tmp4 - tmp5; + tmp131 = c_im(input[16 * istride]); + tmp132 = c_im(input[48 * istride]); + tmp133 = tmp131 + tmp132; + tmp192 = tmp131 - tmp132; + } + { + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp135; + fftw_real tmp136; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(input[8 * istride]); + tmp9 = c_re(input[40 * istride]); + tmp10 = tmp8 + tmp9; + tmp195 = tmp8 - tmp9; + tmp135 = c_im(input[8 * istride]); + tmp136 = c_im(input[40 * istride]); + tmp137 = tmp135 + tmp136; + tmp194 = tmp135 - tmp136; + } + { + fftw_real tmp11; + fftw_real tmp12; + fftw_real tmp138; + fftw_real tmp139; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[56 * istride]); + tmp12 = c_re(input[24 * istride]); + tmp13 = tmp11 + tmp12; + tmp197 = tmp11 - tmp12; + tmp138 = c_im(input[56 * istride]); + tmp139 = c_im(input[24 * istride]); + tmp140 = tmp138 + tmp139; + tmp198 = tmp138 - tmp139; + } + { + fftw_real tmp7; + fftw_real tmp14; + fftw_real tmp635; + fftw_real tmp636; + ASSERT_ALIGNED_DOUBLE; + tmp193 = tmp191 - tmp192; + tmp471 = tmp191 + tmp192; + tmp7 = tmp3 + tmp6; + tmp14 = tmp10 + tmp13; + tmp15 = tmp7 + tmp14; + tmp815 = tmp7 - tmp14; + { + fftw_real tmp717; + fftw_real tmp718; + fftw_real tmp134; + fftw_real tmp141; + ASSERT_ALIGNED_DOUBLE; + tmp717 = tmp130 - tmp133; + tmp718 = tmp13 - tmp10; + tmp719 = tmp717 - tmp718; + tmp781 = tmp718 + tmp717; + tmp134 = tmp130 + tmp133; + tmp141 = tmp137 + tmp140; + tmp142 = tmp134 + tmp141; + tmp849 = tmp134 - tmp141; + } + tmp371 = tmp369 + tmp370; + tmp537 = tmp370 - tmp369; + tmp635 = tmp3 - tmp6; + tmp636 = tmp137 - tmp140; + tmp637 = tmp635 - tmp636; + tmp755 = tmp635 + tmp636; + { + fftw_real tmp196; + fftw_real tmp199; + fftw_real tmp372; + fftw_real tmp373; + ASSERT_ALIGNED_DOUBLE; + tmp196 = tmp194 - tmp195; + tmp199 = tmp197 + tmp198; + tmp200 = K707106781 * (tmp196 - tmp199); + tmp538 = K707106781 * (tmp196 + tmp199); + tmp372 = tmp197 - tmp198; + tmp373 = tmp195 + tmp194; + tmp374 = K707106781 * (tmp372 - tmp373); + tmp472 = K707106781 * (tmp373 + tmp372); + } + } + } + { + fftw_real tmp97; + fftw_real tmp313; + fftw_real tmp357; + fftw_real tmp706; + fftw_real tmp100; + fftw_real tmp354; + fftw_real tmp316; + fftw_real tmp707; + fftw_real tmp107; + fftw_real tmp691; + fftw_real tmp327; + fftw_real tmp359; + fftw_real tmp104; + fftw_real tmp690; + fftw_real tmp322; + fftw_real tmp360; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp95; + fftw_real tmp96; + fftw_real tmp314; + fftw_real tmp315; + ASSERT_ALIGNED_DOUBLE; + tmp95 = c_re(input[63 * istride]); + tmp96 = c_re(input[31 * istride]); + tmp97 = tmp95 + tmp96; + tmp313 = tmp95 - tmp96; + { + fftw_real tmp355; + fftw_real tmp356; + fftw_real tmp98; + fftw_real tmp99; + ASSERT_ALIGNED_DOUBLE; + tmp355 = c_im(input[63 * istride]); + tmp356 = c_im(input[31 * istride]); + tmp357 = tmp355 - tmp356; + tmp706 = tmp355 + tmp356; + tmp98 = c_re(input[15 * istride]); + tmp99 = c_re(input[47 * istride]); + tmp100 = tmp98 + tmp99; + tmp354 = tmp98 - tmp99; + } + tmp314 = c_im(input[15 * istride]); + tmp315 = c_im(input[47 * istride]); + tmp316 = tmp314 - tmp315; + tmp707 = tmp314 + tmp315; + { + fftw_real tmp105; + fftw_real tmp106; + fftw_real tmp323; + fftw_real tmp324; + fftw_real tmp325; + fftw_real tmp326; + ASSERT_ALIGNED_DOUBLE; + tmp105 = c_re(input[55 * istride]); + tmp106 = c_re(input[23 * istride]); + tmp323 = tmp105 - tmp106; + tmp324 = c_im(input[55 * istride]); + tmp325 = c_im(input[23 * istride]); + tmp326 = tmp324 - tmp325; + tmp107 = tmp105 + tmp106; + tmp691 = tmp324 + tmp325; + tmp327 = tmp323 + tmp326; + tmp359 = tmp323 - tmp326; + } + { + fftw_real tmp102; + fftw_real tmp103; + fftw_real tmp321; + fftw_real tmp318; + fftw_real tmp319; + fftw_real tmp320; + ASSERT_ALIGNED_DOUBLE; + tmp102 = c_re(input[7 * istride]); + tmp103 = c_re(input[39 * istride]); + tmp321 = tmp102 - tmp103; + tmp318 = c_im(input[7 * istride]); + tmp319 = c_im(input[39 * istride]); + tmp320 = tmp318 - tmp319; + tmp104 = tmp102 + tmp103; + tmp690 = tmp318 + tmp319; + tmp322 = tmp320 - tmp321; + tmp360 = tmp321 + tmp320; + } + } + { + fftw_real tmp101; + fftw_real tmp108; + fftw_real tmp689; + fftw_real tmp692; + ASSERT_ALIGNED_DOUBLE; + tmp101 = tmp97 + tmp100; + tmp108 = tmp104 + tmp107; + tmp109 = tmp101 + tmp108; + tmp837 = tmp101 - tmp108; + tmp689 = tmp97 - tmp100; + tmp692 = tmp690 - tmp691; + tmp693 = tmp689 - tmp692; + tmp773 = tmp689 + tmp692; + } + { + fftw_real tmp842; + fftw_real tmp843; + fftw_real tmp708; + fftw_real tmp709; + ASSERT_ALIGNED_DOUBLE; + tmp842 = tmp706 + tmp707; + tmp843 = tmp690 + tmp691; + tmp844 = tmp842 - tmp843; + tmp892 = tmp842 + tmp843; + tmp708 = tmp706 - tmp707; + tmp709 = tmp107 - tmp104; + tmp710 = tmp708 - tmp709; + tmp776 = tmp709 + tmp708; + } + { + fftw_real tmp317; + fftw_real tmp328; + fftw_real tmp517; + fftw_real tmp518; + ASSERT_ALIGNED_DOUBLE; + tmp317 = tmp313 - tmp316; + tmp328 = K707106781 * (tmp322 - tmp327); + tmp329 = tmp317 - tmp328; + tmp429 = tmp317 + tmp328; + tmp517 = tmp313 + tmp316; + tmp518 = K707106781 * (tmp360 + tmp359); + tmp519 = tmp517 - tmp518; + tmp593 = tmp517 + tmp518; + } + { + fftw_real tmp358; + fftw_real tmp361; + fftw_real tmp528; + fftw_real tmp529; + ASSERT_ALIGNED_DOUBLE; + tmp358 = tmp354 + tmp357; + tmp361 = K707106781 * (tmp359 - tmp360); + tmp362 = tmp358 - tmp361; + tmp432 = tmp358 + tmp361; + tmp528 = tmp357 - tmp354; + tmp529 = K707106781 * (tmp322 + tmp327); + tmp530 = tmp528 - tmp529; + tmp596 = tmp528 + tmp529; + } + } + { + fftw_real tmp18; + fftw_real tmp205; + fftw_real tmp145; + fftw_real tmp203; + fftw_real tmp21; + fftw_real tmp202; + fftw_real tmp148; + fftw_real tmp206; + fftw_real tmp25; + fftw_real tmp212; + fftw_real tmp152; + fftw_real tmp210; + fftw_real tmp28; + fftw_real tmp209; + fftw_real tmp155; + fftw_real tmp213; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp16; + fftw_real tmp17; + fftw_real tmp143; + fftw_real tmp144; + ASSERT_ALIGNED_DOUBLE; + tmp16 = c_re(input[4 * istride]); + tmp17 = c_re(input[36 * istride]); + tmp18 = tmp16 + tmp17; + tmp205 = tmp16 - tmp17; + tmp143 = c_im(input[4 * istride]); + tmp144 = c_im(input[36 * istride]); + tmp145 = tmp143 + tmp144; + tmp203 = tmp143 - tmp144; + } + { + fftw_real tmp19; + fftw_real tmp20; + fftw_real tmp146; + fftw_real tmp147; + ASSERT_ALIGNED_DOUBLE; + tmp19 = c_re(input[20 * istride]); + tmp20 = c_re(input[52 * istride]); + tmp21 = tmp19 + tmp20; + tmp202 = tmp19 - tmp20; + tmp146 = c_im(input[20 * istride]); + tmp147 = c_im(input[52 * istride]); + tmp148 = tmp146 + tmp147; + tmp206 = tmp146 - tmp147; + } + { + fftw_real tmp23; + fftw_real tmp24; + fftw_real tmp150; + fftw_real tmp151; + ASSERT_ALIGNED_DOUBLE; + tmp23 = c_re(input[60 * istride]); + tmp24 = c_re(input[28 * istride]); + tmp25 = tmp23 + tmp24; + tmp212 = tmp23 - tmp24; + tmp150 = c_im(input[60 * istride]); + tmp151 = c_im(input[28 * istride]); + tmp152 = tmp150 + tmp151; + tmp210 = tmp150 - tmp151; + } + { + fftw_real tmp26; + fftw_real tmp27; + fftw_real tmp153; + fftw_real tmp154; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(input[12 * istride]); + tmp27 = c_re(input[44 * istride]); + tmp28 = tmp26 + tmp27; + tmp209 = tmp26 - tmp27; + tmp153 = c_im(input[12 * istride]); + tmp154 = c_im(input[44 * istride]); + tmp155 = tmp153 + tmp154; + tmp213 = tmp153 - tmp154; + } + { + fftw_real tmp22; + fftw_real tmp29; + fftw_real tmp638; + fftw_real tmp639; + ASSERT_ALIGNED_DOUBLE; + tmp22 = tmp18 + tmp21; + tmp29 = tmp25 + tmp28; + tmp30 = tmp22 + tmp29; + tmp850 = tmp29 - tmp22; + tmp638 = tmp145 - tmp148; + tmp639 = tmp18 - tmp21; + tmp640 = tmp638 - tmp639; + tmp721 = tmp639 + tmp638; + } + { + fftw_real tmp149; + fftw_real tmp156; + fftw_real tmp641; + fftw_real tmp642; + ASSERT_ALIGNED_DOUBLE; + tmp149 = tmp145 + tmp148; + tmp156 = tmp152 + tmp155; + tmp157 = tmp149 + tmp156; + tmp816 = tmp149 - tmp156; + tmp641 = tmp25 - tmp28; + tmp642 = tmp152 - tmp155; + tmp643 = tmp641 + tmp642; + tmp720 = tmp641 - tmp642; + } + { + fftw_real tmp204; + fftw_real tmp207; + fftw_real tmp474; + fftw_real tmp475; + ASSERT_ALIGNED_DOUBLE; + tmp204 = tmp202 + tmp203; + tmp207 = tmp205 - tmp206; + tmp208 = (K382683432 * tmp204) - (K923879532 * tmp207); + tmp377 = (K923879532 * tmp204) + (K382683432 * tmp207); + tmp474 = tmp203 - tmp202; + tmp475 = tmp205 + tmp206; + tmp476 = (K923879532 * tmp474) - (K382683432 * tmp475); + tmp541 = (K382683432 * tmp474) + (K923879532 * tmp475); + } + { + fftw_real tmp211; + fftw_real tmp214; + fftw_real tmp477; + fftw_real tmp478; + ASSERT_ALIGNED_DOUBLE; + tmp211 = tmp209 + tmp210; + tmp214 = tmp212 - tmp213; + tmp215 = (K382683432 * tmp211) + (K923879532 * tmp214); + tmp376 = (K382683432 * tmp214) - (K923879532 * tmp211); + tmp477 = tmp210 - tmp209; + tmp478 = tmp212 + tmp213; + tmp479 = (K923879532 * tmp477) + (K382683432 * tmp478); + tmp540 = (K923879532 * tmp478) - (K382683432 * tmp477); + } + } + { + fftw_real tmp112; + fftw_real tmp694; + fftw_real tmp115; + fftw_real tmp695; + fftw_real tmp334; + fftw_real tmp520; + fftw_real tmp339; + fftw_real tmp521; + fftw_real tmp697; + fftw_real tmp696; + fftw_real tmp119; + fftw_real tmp700; + fftw_real tmp122; + fftw_real tmp701; + fftw_real tmp345; + fftw_real tmp523; + fftw_real tmp350; + fftw_real tmp524; + fftw_real tmp702; + fftw_real tmp699; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp335; + fftw_real tmp333; + fftw_real tmp330; + fftw_real tmp338; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp110; + fftw_real tmp111; + fftw_real tmp331; + fftw_real tmp332; + ASSERT_ALIGNED_DOUBLE; + tmp110 = c_re(input[3 * istride]); + tmp111 = c_re(input[35 * istride]); + tmp112 = tmp110 + tmp111; + tmp335 = tmp110 - tmp111; + tmp331 = c_im(input[3 * istride]); + tmp332 = c_im(input[35 * istride]); + tmp333 = tmp331 - tmp332; + tmp694 = tmp331 + tmp332; + } + { + fftw_real tmp113; + fftw_real tmp114; + fftw_real tmp336; + fftw_real tmp337; + ASSERT_ALIGNED_DOUBLE; + tmp113 = c_re(input[19 * istride]); + tmp114 = c_re(input[51 * istride]); + tmp115 = tmp113 + tmp114; + tmp330 = tmp113 - tmp114; + tmp336 = c_im(input[19 * istride]); + tmp337 = c_im(input[51 * istride]); + tmp338 = tmp336 - tmp337; + tmp695 = tmp336 + tmp337; + } + tmp334 = tmp330 + tmp333; + tmp520 = tmp333 - tmp330; + tmp339 = tmp335 - tmp338; + tmp521 = tmp335 + tmp338; + tmp697 = tmp112 - tmp115; + tmp696 = tmp694 - tmp695; + } + { + fftw_real tmp346; + fftw_real tmp344; + fftw_real tmp341; + fftw_real tmp349; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp117; + fftw_real tmp118; + fftw_real tmp342; + fftw_real tmp343; + ASSERT_ALIGNED_DOUBLE; + tmp117 = c_re(input[59 * istride]); + tmp118 = c_re(input[27 * istride]); + tmp119 = tmp117 + tmp118; + tmp346 = tmp117 - tmp118; + tmp342 = c_im(input[59 * istride]); + tmp343 = c_im(input[27 * istride]); + tmp344 = tmp342 - tmp343; + tmp700 = tmp342 + tmp343; + } + { + fftw_real tmp120; + fftw_real tmp121; + fftw_real tmp347; + fftw_real tmp348; + ASSERT_ALIGNED_DOUBLE; + tmp120 = c_re(input[11 * istride]); + tmp121 = c_re(input[43 * istride]); + tmp122 = tmp120 + tmp121; + tmp341 = tmp120 - tmp121; + tmp347 = c_im(input[11 * istride]); + tmp348 = c_im(input[43 * istride]); + tmp349 = tmp347 - tmp348; + tmp701 = tmp347 + tmp348; + } + tmp345 = tmp341 + tmp344; + tmp523 = tmp344 - tmp341; + tmp350 = tmp346 - tmp349; + tmp524 = tmp346 + tmp349; + tmp702 = tmp700 - tmp701; + tmp699 = tmp119 - tmp122; + } + { + fftw_real tmp116; + fftw_real tmp123; + fftw_real tmp363; + fftw_real tmp364; + ASSERT_ALIGNED_DOUBLE; + tmp116 = tmp112 + tmp115; + tmp123 = tmp119 + tmp122; + tmp124 = tmp116 + tmp123; + tmp845 = tmp123 - tmp116; + tmp363 = (K382683432 * tmp350) - (K923879532 * tmp345); + tmp364 = (K923879532 * tmp334) + (K382683432 * tmp339); + tmp365 = tmp363 - tmp364; + tmp430 = tmp364 + tmp363; + } + { + fftw_real tmp340; + fftw_real tmp351; + fftw_real tmp838; + fftw_real tmp839; + ASSERT_ALIGNED_DOUBLE; + tmp340 = (K382683432 * tmp334) - (K923879532 * tmp339); + tmp351 = (K382683432 * tmp345) + (K923879532 * tmp350); + tmp352 = tmp340 - tmp351; + tmp433 = tmp340 + tmp351; + tmp838 = tmp694 + tmp695; + tmp839 = tmp700 + tmp701; + tmp840 = tmp838 - tmp839; + tmp893 = tmp838 + tmp839; + } + { + fftw_real tmp522; + fftw_real tmp525; + fftw_real tmp531; + fftw_real tmp532; + ASSERT_ALIGNED_DOUBLE; + tmp522 = (K923879532 * tmp520) - (K382683432 * tmp521); + tmp525 = (K923879532 * tmp523) + (K382683432 * tmp524); + tmp526 = tmp522 - tmp525; + tmp597 = tmp522 + tmp525; + tmp531 = (K923879532 * tmp524) - (K382683432 * tmp523); + tmp532 = (K382683432 * tmp520) + (K923879532 * tmp521); + tmp533 = tmp531 - tmp532; + tmp594 = tmp532 + tmp531; + } + { + fftw_real tmp698; + fftw_real tmp703; + fftw_real tmp711; + fftw_real tmp712; + ASSERT_ALIGNED_DOUBLE; + tmp698 = tmp696 - tmp697; + tmp703 = tmp699 + tmp702; + tmp704 = K707106781 * (tmp698 - tmp703); + tmp777 = K707106781 * (tmp698 + tmp703); + tmp711 = tmp699 - tmp702; + tmp712 = tmp697 + tmp696; + tmp713 = K707106781 * (tmp711 - tmp712); + tmp774 = K707106781 * (tmp712 + tmp711); + } + } + { + fftw_real tmp34; + fftw_real tmp229; + fftw_real tmp161; + fftw_real tmp219; + fftw_real tmp37; + fftw_real tmp218; + fftw_real tmp164; + fftw_real tmp230; + fftw_real tmp44; + fftw_real tmp233; + fftw_real tmp223; + fftw_real tmp171; + fftw_real tmp41; + fftw_real tmp232; + fftw_real tmp226; + fftw_real tmp168; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp32; + fftw_real tmp33; + fftw_real tmp162; + fftw_real tmp163; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(input[2 * istride]); + tmp33 = c_re(input[34 * istride]); + tmp34 = tmp32 + tmp33; + tmp229 = tmp32 - tmp33; + { + fftw_real tmp159; + fftw_real tmp160; + fftw_real tmp35; + fftw_real tmp36; + ASSERT_ALIGNED_DOUBLE; + tmp159 = c_im(input[2 * istride]); + tmp160 = c_im(input[34 * istride]); + tmp161 = tmp159 + tmp160; + tmp219 = tmp159 - tmp160; + tmp35 = c_re(input[18 * istride]); + tmp36 = c_re(input[50 * istride]); + tmp37 = tmp35 + tmp36; + tmp218 = tmp35 - tmp36; + } + tmp162 = c_im(input[18 * istride]); + tmp163 = c_im(input[50 * istride]); + tmp164 = tmp162 + tmp163; + tmp230 = tmp162 - tmp163; + { + fftw_real tmp42; + fftw_real tmp43; + fftw_real tmp221; + fftw_real tmp169; + fftw_real tmp170; + fftw_real tmp222; + ASSERT_ALIGNED_DOUBLE; + tmp42 = c_re(input[58 * istride]); + tmp43 = c_re(input[26 * istride]); + tmp221 = tmp42 - tmp43; + tmp169 = c_im(input[58 * istride]); + tmp170 = c_im(input[26 * istride]); + tmp222 = tmp169 - tmp170; + tmp44 = tmp42 + tmp43; + tmp233 = tmp221 + tmp222; + tmp223 = tmp221 - tmp222; + tmp171 = tmp169 + tmp170; + } + { + fftw_real tmp39; + fftw_real tmp40; + fftw_real tmp224; + fftw_real tmp166; + fftw_real tmp167; + fftw_real tmp225; + ASSERT_ALIGNED_DOUBLE; + tmp39 = c_re(input[10 * istride]); + tmp40 = c_re(input[42 * istride]); + tmp224 = tmp39 - tmp40; + tmp166 = c_im(input[10 * istride]); + tmp167 = c_im(input[42 * istride]); + tmp225 = tmp166 - tmp167; + tmp41 = tmp39 + tmp40; + tmp232 = tmp225 - tmp224; + tmp226 = tmp224 + tmp225; + tmp168 = tmp166 + tmp167; + } + } + { + fftw_real tmp38; + fftw_real tmp45; + fftw_real tmp646; + fftw_real tmp647; + ASSERT_ALIGNED_DOUBLE; + tmp38 = tmp34 + tmp37; + tmp45 = tmp41 + tmp44; + tmp46 = tmp38 + tmp45; + tmp819 = tmp38 - tmp45; + tmp646 = tmp161 - tmp164; + tmp647 = tmp44 - tmp41; + tmp648 = tmp646 - tmp647; + tmp758 = tmp647 + tmp646; + } + { + fftw_real tmp165; + fftw_real tmp172; + fftw_real tmp649; + fftw_real tmp650; + ASSERT_ALIGNED_DOUBLE; + tmp165 = tmp161 + tmp164; + tmp172 = tmp168 + tmp171; + tmp173 = tmp165 + tmp172; + tmp818 = tmp165 - tmp172; + tmp649 = tmp34 - tmp37; + tmp650 = tmp168 - tmp171; + tmp651 = tmp649 - tmp650; + tmp759 = tmp649 + tmp650; + } + { + fftw_real tmp220; + fftw_real tmp227; + fftw_real tmp482; + fftw_real tmp483; + ASSERT_ALIGNED_DOUBLE; + tmp220 = tmp218 + tmp219; + tmp227 = K707106781 * (tmp223 - tmp226); + tmp228 = tmp220 - tmp227; + tmp414 = tmp220 + tmp227; + tmp482 = tmp219 - tmp218; + tmp483 = K707106781 * (tmp232 + tmp233); + tmp484 = tmp482 - tmp483; + tmp578 = tmp482 + tmp483; + } + { + fftw_real tmp231; + fftw_real tmp234; + fftw_real tmp485; + fftw_real tmp486; + ASSERT_ALIGNED_DOUBLE; + tmp231 = tmp229 - tmp230; + tmp234 = K707106781 * (tmp232 - tmp233); + tmp235 = tmp231 - tmp234; + tmp415 = tmp231 + tmp234; + tmp485 = tmp229 + tmp230; + tmp486 = K707106781 * (tmp226 + tmp223); + tmp487 = tmp485 - tmp486; + tmp579 = tmp485 + tmp486; + } + } + { + fftw_real tmp66; + fftw_real tmp299; + fftw_real tmp261; + fftw_real tmp662; + fftw_real tmp69; + fftw_real tmp258; + fftw_real tmp302; + fftw_real tmp663; + fftw_real tmp76; + fftw_real tmp681; + fftw_real tmp267; + fftw_real tmp305; + fftw_real tmp73; + fftw_real tmp680; + fftw_real tmp272; + fftw_real tmp304; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp64; + fftw_real tmp65; + fftw_real tmp300; + fftw_real tmp301; + ASSERT_ALIGNED_DOUBLE; + tmp64 = c_re(input[istride]); + tmp65 = c_re(input[33 * istride]); + tmp66 = tmp64 + tmp65; + tmp299 = tmp64 - tmp65; + { + fftw_real tmp259; + fftw_real tmp260; + fftw_real tmp67; + fftw_real tmp68; + ASSERT_ALIGNED_DOUBLE; + tmp259 = c_im(input[istride]); + tmp260 = c_im(input[33 * istride]); + tmp261 = tmp259 - tmp260; + tmp662 = tmp259 + tmp260; + tmp67 = c_re(input[17 * istride]); + tmp68 = c_re(input[49 * istride]); + tmp69 = tmp67 + tmp68; + tmp258 = tmp67 - tmp68; + } + tmp300 = c_im(input[17 * istride]); + tmp301 = c_im(input[49 * istride]); + tmp302 = tmp300 - tmp301; + tmp663 = tmp300 + tmp301; + { + fftw_real tmp74; + fftw_real tmp75; + fftw_real tmp263; + fftw_real tmp264; + fftw_real tmp265; + fftw_real tmp266; + ASSERT_ALIGNED_DOUBLE; + tmp74 = c_re(input[57 * istride]); + tmp75 = c_re(input[25 * istride]); + tmp263 = tmp74 - tmp75; + tmp264 = c_im(input[57 * istride]); + tmp265 = c_im(input[25 * istride]); + tmp266 = tmp264 - tmp265; + tmp76 = tmp74 + tmp75; + tmp681 = tmp264 + tmp265; + tmp267 = tmp263 - tmp266; + tmp305 = tmp263 + tmp266; + } + { + fftw_real tmp71; + fftw_real tmp72; + fftw_real tmp268; + fftw_real tmp269; + fftw_real tmp270; + fftw_real tmp271; + ASSERT_ALIGNED_DOUBLE; + tmp71 = c_re(input[9 * istride]); + tmp72 = c_re(input[41 * istride]); + tmp268 = tmp71 - tmp72; + tmp269 = c_im(input[9 * istride]); + tmp270 = c_im(input[41 * istride]); + tmp271 = tmp269 - tmp270; + tmp73 = tmp71 + tmp72; + tmp680 = tmp269 + tmp270; + tmp272 = tmp268 + tmp271; + tmp304 = tmp271 - tmp268; + } + } + { + fftw_real tmp70; + fftw_real tmp77; + fftw_real tmp664; + fftw_real tmp665; + ASSERT_ALIGNED_DOUBLE; + tmp70 = tmp66 + tmp69; + tmp77 = tmp73 + tmp76; + tmp78 = tmp70 + tmp77; + tmp831 = tmp70 - tmp77; + tmp664 = tmp662 - tmp663; + tmp665 = tmp76 - tmp73; + tmp666 = tmp664 - tmp665; + tmp769 = tmp665 + tmp664; + } + { + fftw_real tmp826; + fftw_real tmp827; + fftw_real tmp679; + fftw_real tmp682; + ASSERT_ALIGNED_DOUBLE; + tmp826 = tmp662 + tmp663; + tmp827 = tmp680 + tmp681; + tmp828 = tmp826 - tmp827; + tmp887 = tmp826 + tmp827; + tmp679 = tmp66 - tmp69; + tmp682 = tmp680 - tmp681; + tmp683 = tmp679 - tmp682; + tmp766 = tmp679 + tmp682; + } + { + fftw_real tmp262; + fftw_real tmp273; + fftw_real tmp498; + fftw_real tmp499; + ASSERT_ALIGNED_DOUBLE; + tmp262 = tmp258 + tmp261; + tmp273 = K707106781 * (tmp267 - tmp272); + tmp274 = tmp262 - tmp273; + tmp425 = tmp262 + tmp273; + tmp498 = tmp261 - tmp258; + tmp499 = K707106781 * (tmp304 + tmp305); + tmp500 = tmp498 - tmp499; + tmp589 = tmp498 + tmp499; + } + { + fftw_real tmp303; + fftw_real tmp306; + fftw_real tmp509; + fftw_real tmp510; + ASSERT_ALIGNED_DOUBLE; + tmp303 = tmp299 - tmp302; + tmp306 = K707106781 * (tmp304 - tmp305); + tmp307 = tmp303 - tmp306; + tmp422 = tmp303 + tmp306; + tmp509 = tmp299 + tmp302; + tmp510 = K707106781 * (tmp272 + tmp267); + tmp511 = tmp509 - tmp510; + tmp586 = tmp509 + tmp510; + } + } + { + fftw_real tmp49; + fftw_real tmp248; + fftw_real tmp176; + fftw_real tmp238; + fftw_real tmp52; + fftw_real tmp237; + fftw_real tmp179; + fftw_real tmp249; + fftw_real tmp59; + fftw_real tmp252; + fftw_real tmp242; + fftw_real tmp186; + fftw_real tmp56; + fftw_real tmp251; + fftw_real tmp245; + fftw_real tmp183; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp47; + fftw_real tmp48; + fftw_real tmp177; + fftw_real tmp178; + ASSERT_ALIGNED_DOUBLE; + tmp47 = c_re(input[62 * istride]); + tmp48 = c_re(input[30 * istride]); + tmp49 = tmp47 + tmp48; + tmp248 = tmp47 - tmp48; + { + fftw_real tmp174; + fftw_real tmp175; + fftw_real tmp50; + fftw_real tmp51; + ASSERT_ALIGNED_DOUBLE; + tmp174 = c_im(input[62 * istride]); + tmp175 = c_im(input[30 * istride]); + tmp176 = tmp174 + tmp175; + tmp238 = tmp174 - tmp175; + tmp50 = c_re(input[14 * istride]); + tmp51 = c_re(input[46 * istride]); + tmp52 = tmp50 + tmp51; + tmp237 = tmp50 - tmp51; + } + tmp177 = c_im(input[14 * istride]); + tmp178 = c_im(input[46 * istride]); + tmp179 = tmp177 + tmp178; + tmp249 = tmp177 - tmp178; + { + fftw_real tmp57; + fftw_real tmp58; + fftw_real tmp240; + fftw_real tmp184; + fftw_real tmp185; + fftw_real tmp241; + ASSERT_ALIGNED_DOUBLE; + tmp57 = c_re(input[54 * istride]); + tmp58 = c_re(input[22 * istride]); + tmp240 = tmp57 - tmp58; + tmp184 = c_im(input[54 * istride]); + tmp185 = c_im(input[22 * istride]); + tmp241 = tmp184 - tmp185; + tmp59 = tmp57 + tmp58; + tmp252 = tmp240 + tmp241; + tmp242 = tmp240 - tmp241; + tmp186 = tmp184 + tmp185; + } + { + fftw_real tmp54; + fftw_real tmp55; + fftw_real tmp243; + fftw_real tmp181; + fftw_real tmp182; + fftw_real tmp244; + ASSERT_ALIGNED_DOUBLE; + tmp54 = c_re(input[6 * istride]); + tmp55 = c_re(input[38 * istride]); + tmp243 = tmp54 - tmp55; + tmp181 = c_im(input[6 * istride]); + tmp182 = c_im(input[38 * istride]); + tmp244 = tmp181 - tmp182; + tmp56 = tmp54 + tmp55; + tmp251 = tmp244 - tmp243; + tmp245 = tmp243 + tmp244; + tmp183 = tmp181 + tmp182; + } + } + { + fftw_real tmp53; + fftw_real tmp60; + fftw_real tmp653; + fftw_real tmp654; + ASSERT_ALIGNED_DOUBLE; + tmp53 = tmp49 + tmp52; + tmp60 = tmp56 + tmp59; + tmp61 = tmp53 + tmp60; + tmp821 = tmp53 - tmp60; + tmp653 = tmp176 - tmp179; + tmp654 = tmp59 - tmp56; + tmp655 = tmp653 - tmp654; + tmp761 = tmp654 + tmp653; + } + { + fftw_real tmp180; + fftw_real tmp187; + fftw_real tmp656; + fftw_real tmp657; + ASSERT_ALIGNED_DOUBLE; + tmp180 = tmp176 + tmp179; + tmp187 = tmp183 + tmp186; + tmp188 = tmp180 + tmp187; + tmp822 = tmp180 - tmp187; + tmp656 = tmp49 - tmp52; + tmp657 = tmp183 - tmp186; + tmp658 = tmp656 - tmp657; + tmp762 = tmp656 + tmp657; + } + { + fftw_real tmp239; + fftw_real tmp246; + fftw_real tmp489; + fftw_real tmp490; + ASSERT_ALIGNED_DOUBLE; + tmp239 = tmp237 + tmp238; + tmp246 = K707106781 * (tmp242 - tmp245); + tmp247 = tmp239 - tmp246; + tmp417 = tmp239 + tmp246; + tmp489 = tmp248 + tmp249; + tmp490 = K707106781 * (tmp245 + tmp242); + tmp491 = tmp489 - tmp490; + tmp581 = tmp489 + tmp490; + } + { + fftw_real tmp250; + fftw_real tmp253; + fftw_real tmp492; + fftw_real tmp493; + ASSERT_ALIGNED_DOUBLE; + tmp250 = tmp248 - tmp249; + tmp253 = K707106781 * (tmp251 - tmp252); + tmp254 = tmp250 - tmp253; + tmp418 = tmp250 + tmp253; + tmp492 = tmp238 - tmp237; + tmp493 = K707106781 * (tmp251 + tmp252); + tmp494 = tmp492 - tmp493; + tmp582 = tmp492 + tmp493; + } + } + { + fftw_real tmp81; + fftw_real tmp673; + fftw_real tmp84; + fftw_real tmp674; + fftw_real tmp290; + fftw_real tmp504; + fftw_real tmp295; + fftw_real tmp505; + fftw_real tmp675; + fftw_real tmp672; + fftw_real tmp88; + fftw_real tmp668; + fftw_real tmp91; + fftw_real tmp669; + fftw_real tmp279; + fftw_real tmp501; + fftw_real tmp284; + fftw_real tmp502; + fftw_real tmp670; + fftw_real tmp667; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp291; + fftw_real tmp289; + fftw_real tmp286; + fftw_real tmp294; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp79; + fftw_real tmp80; + fftw_real tmp287; + fftw_real tmp288; + ASSERT_ALIGNED_DOUBLE; + tmp79 = c_re(input[5 * istride]); + tmp80 = c_re(input[37 * istride]); + tmp81 = tmp79 + tmp80; + tmp291 = tmp79 - tmp80; + tmp287 = c_im(input[5 * istride]); + tmp288 = c_im(input[37 * istride]); + tmp289 = tmp287 - tmp288; + tmp673 = tmp287 + tmp288; + } + { + fftw_real tmp82; + fftw_real tmp83; + fftw_real tmp292; + fftw_real tmp293; + ASSERT_ALIGNED_DOUBLE; + tmp82 = c_re(input[21 * istride]); + tmp83 = c_re(input[53 * istride]); + tmp84 = tmp82 + tmp83; + tmp286 = tmp82 - tmp83; + tmp292 = c_im(input[21 * istride]); + tmp293 = c_im(input[53 * istride]); + tmp294 = tmp292 - tmp293; + tmp674 = tmp292 + tmp293; + } + tmp290 = tmp286 + tmp289; + tmp504 = tmp289 - tmp286; + tmp295 = tmp291 - tmp294; + tmp505 = tmp291 + tmp294; + tmp675 = tmp673 - tmp674; + tmp672 = tmp81 - tmp84; + } + { + fftw_real tmp275; + fftw_real tmp283; + fftw_real tmp280; + fftw_real tmp278; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp86; + fftw_real tmp87; + fftw_real tmp281; + fftw_real tmp282; + ASSERT_ALIGNED_DOUBLE; + tmp86 = c_re(input[61 * istride]); + tmp87 = c_re(input[29 * istride]); + tmp88 = tmp86 + tmp87; + tmp275 = tmp86 - tmp87; + tmp281 = c_im(input[61 * istride]); + tmp282 = c_im(input[29 * istride]); + tmp283 = tmp281 - tmp282; + tmp668 = tmp281 + tmp282; + } + { + fftw_real tmp89; + fftw_real tmp90; + fftw_real tmp276; + fftw_real tmp277; + ASSERT_ALIGNED_DOUBLE; + tmp89 = c_re(input[13 * istride]); + tmp90 = c_re(input[45 * istride]); + tmp91 = tmp89 + tmp90; + tmp280 = tmp89 - tmp90; + tmp276 = c_im(input[13 * istride]); + tmp277 = c_im(input[45 * istride]); + tmp278 = tmp276 - tmp277; + tmp669 = tmp276 + tmp277; + } + tmp279 = tmp275 - tmp278; + tmp501 = tmp275 + tmp278; + tmp284 = tmp280 + tmp283; + tmp502 = tmp283 - tmp280; + tmp670 = tmp668 - tmp669; + tmp667 = tmp88 - tmp91; + } + { + fftw_real tmp85; + fftw_real tmp92; + fftw_real tmp308; + fftw_real tmp309; + ASSERT_ALIGNED_DOUBLE; + tmp85 = tmp81 + tmp84; + tmp92 = tmp88 + tmp91; + tmp93 = tmp85 + tmp92; + tmp829 = tmp92 - tmp85; + tmp308 = (K382683432 * tmp290) - (K923879532 * tmp295); + tmp309 = (K382683432 * tmp284) + (K923879532 * tmp279); + tmp310 = tmp308 - tmp309; + tmp426 = tmp308 + tmp309; + } + { + fftw_real tmp285; + fftw_real tmp296; + fftw_real tmp832; + fftw_real tmp833; + ASSERT_ALIGNED_DOUBLE; + tmp285 = (K382683432 * tmp279) - (K923879532 * tmp284); + tmp296 = (K923879532 * tmp290) + (K382683432 * tmp295); + tmp297 = tmp285 - tmp296; + tmp423 = tmp296 + tmp285; + tmp832 = tmp673 + tmp674; + tmp833 = tmp668 + tmp669; + tmp834 = tmp832 - tmp833; + tmp888 = tmp832 + tmp833; + } + { + fftw_real tmp503; + fftw_real tmp506; + fftw_real tmp512; + fftw_real tmp513; + ASSERT_ALIGNED_DOUBLE; + tmp503 = (K923879532 * tmp501) - (K382683432 * tmp502); + tmp506 = (K382683432 * tmp504) + (K923879532 * tmp505); + tmp507 = tmp503 - tmp506; + tmp587 = tmp506 + tmp503; + tmp512 = (K923879532 * tmp504) - (K382683432 * tmp505); + tmp513 = (K923879532 * tmp502) + (K382683432 * tmp501); + tmp514 = tmp512 - tmp513; + tmp590 = tmp512 + tmp513; + } + { + fftw_real tmp671; + fftw_real tmp676; + fftw_real tmp684; + fftw_real tmp685; + ASSERT_ALIGNED_DOUBLE; + tmp671 = tmp667 - tmp670; + tmp676 = tmp672 + tmp675; + tmp677 = K707106781 * (tmp671 - tmp676); + tmp767 = K707106781 * (tmp676 + tmp671); + tmp684 = tmp675 - tmp672; + tmp685 = tmp667 + tmp670; + tmp686 = K707106781 * (tmp684 - tmp685); + tmp770 = K707106781 * (tmp684 + tmp685); + } + } + { + fftw_real tmp63; + fftw_real tmp907; + fftw_real tmp910; + fftw_real tmp912; + fftw_real tmp126; + fftw_real tmp127; + fftw_real tmp190; + fftw_real tmp911; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp31; + fftw_real tmp62; + fftw_real tmp908; + fftw_real tmp909; + ASSERT_ALIGNED_DOUBLE; + tmp31 = tmp15 + tmp30; + tmp62 = tmp46 + tmp61; + tmp63 = tmp31 + tmp62; + tmp907 = tmp31 - tmp62; + tmp908 = tmp887 + tmp888; + tmp909 = tmp892 + tmp893; + tmp910 = tmp908 - tmp909; + tmp912 = tmp908 + tmp909; + } + { + fftw_real tmp94; + fftw_real tmp125; + fftw_real tmp158; + fftw_real tmp189; + ASSERT_ALIGNED_DOUBLE; + tmp94 = tmp78 + tmp93; + tmp125 = tmp109 + tmp124; + tmp126 = tmp94 + tmp125; + tmp127 = tmp125 - tmp94; + tmp158 = tmp142 + tmp157; + tmp189 = tmp173 + tmp188; + tmp190 = tmp158 - tmp189; + tmp911 = tmp158 + tmp189; + } + c_re(output[32 * ostride]) = tmp63 - tmp126; + c_re(output[0]) = tmp63 + tmp126; + c_im(output[16 * ostride]) = tmp127 + tmp190; + c_im(output[48 * ostride]) = tmp190 - tmp127; + c_re(output[48 * ostride]) = tmp907 - tmp910; + c_re(output[16 * ostride]) = tmp907 + tmp910; + c_im(output[32 * ostride]) = tmp911 - tmp912; + c_im(output[0]) = tmp911 + tmp912; + } + { + fftw_real tmp885; + fftw_real tmp901; + fftw_real tmp899; + fftw_real tmp905; + fftw_real tmp890; + fftw_real tmp902; + fftw_real tmp895; + fftw_real tmp903; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp883; + fftw_real tmp884; + fftw_real tmp897; + fftw_real tmp898; + ASSERT_ALIGNED_DOUBLE; + tmp883 = tmp15 - tmp30; + tmp884 = tmp173 - tmp188; + tmp885 = tmp883 + tmp884; + tmp901 = tmp883 - tmp884; + tmp897 = tmp142 - tmp157; + tmp898 = tmp61 - tmp46; + tmp899 = tmp897 - tmp898; + tmp905 = tmp898 + tmp897; + } + { + fftw_real tmp886; + fftw_real tmp889; + fftw_real tmp891; + fftw_real tmp894; + ASSERT_ALIGNED_DOUBLE; + tmp886 = tmp78 - tmp93; + tmp889 = tmp887 - tmp888; + tmp890 = tmp886 + tmp889; + tmp902 = tmp889 - tmp886; + tmp891 = tmp109 - tmp124; + tmp894 = tmp892 - tmp893; + tmp895 = tmp891 - tmp894; + tmp903 = tmp891 + tmp894; + } + { + fftw_real tmp896; + fftw_real tmp900; + fftw_real tmp904; + fftw_real tmp906; + ASSERT_ALIGNED_DOUBLE; + tmp896 = K707106781 * (tmp890 + tmp895); + c_re(output[40 * ostride]) = tmp885 - tmp896; + c_re(output[8 * ostride]) = tmp885 + tmp896; + tmp900 = K707106781 * (tmp895 - tmp890); + c_im(output[56 * ostride]) = tmp899 - tmp900; + c_im(output[24 * ostride]) = tmp899 + tmp900; + tmp904 = K707106781 * (tmp902 - tmp903); + c_re(output[56 * ostride]) = tmp901 - tmp904; + c_re(output[24 * ostride]) = tmp901 + tmp904; + tmp906 = K707106781 * (tmp902 + tmp903); + c_im(output[40 * ostride]) = tmp905 - tmp906; + c_im(output[8 * ostride]) = tmp905 + tmp906; + } + } + { + fftw_real tmp217; + fftw_real tmp391; + fftw_real tmp396; + fftw_real tmp406; + fftw_real tmp399; + fftw_real tmp407; + fftw_real tmp367; + fftw_real tmp387; + fftw_real tmp312; + fftw_real tmp386; + fftw_real tmp379; + fftw_real tmp401; + fftw_real tmp382; + fftw_real tmp392; + fftw_real tmp256; + fftw_real tmp402; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp201; + fftw_real tmp216; + fftw_real tmp394; + fftw_real tmp395; + ASSERT_ALIGNED_DOUBLE; + tmp201 = tmp193 - tmp200; + tmp216 = tmp208 - tmp215; + tmp217 = tmp201 - tmp216; + tmp391 = tmp201 + tmp216; + tmp394 = tmp274 + tmp297; + tmp395 = tmp307 + tmp310; + tmp396 = (K634393284 * tmp394) + (K773010453 * tmp395); + tmp406 = (K773010453 * tmp394) - (K634393284 * tmp395); + } + { + fftw_real tmp397; + fftw_real tmp398; + fftw_real tmp353; + fftw_real tmp366; + ASSERT_ALIGNED_DOUBLE; + tmp397 = tmp329 + tmp352; + tmp398 = tmp362 + tmp365; + tmp399 = (K773010453 * tmp397) - (K634393284 * tmp398); + tmp407 = (K773010453 * tmp398) + (K634393284 * tmp397); + tmp353 = tmp329 - tmp352; + tmp366 = tmp362 - tmp365; + tmp367 = (K098017140 * tmp353) - (K995184726 * tmp366); + tmp387 = (K098017140 * tmp366) + (K995184726 * tmp353); + } + { + fftw_real tmp298; + fftw_real tmp311; + fftw_real tmp375; + fftw_real tmp378; + ASSERT_ALIGNED_DOUBLE; + tmp298 = tmp274 - tmp297; + tmp311 = tmp307 - tmp310; + tmp312 = (K995184726 * tmp298) + (K098017140 * tmp311); + tmp386 = (K098017140 * tmp298) - (K995184726 * tmp311); + tmp375 = tmp371 - tmp374; + tmp378 = tmp376 - tmp377; + tmp379 = tmp375 - tmp378; + tmp401 = tmp375 + tmp378; + } + { + fftw_real tmp380; + fftw_real tmp381; + fftw_real tmp236; + fftw_real tmp255; + ASSERT_ALIGNED_DOUBLE; + tmp380 = (K195090322 * tmp254) - (K980785280 * tmp247); + tmp381 = (K980785280 * tmp228) + (K195090322 * tmp235); + tmp382 = tmp380 - tmp381; + tmp392 = tmp381 + tmp380; + tmp236 = (K195090322 * tmp228) - (K980785280 * tmp235); + tmp255 = (K195090322 * tmp247) + (K980785280 * tmp254); + tmp256 = tmp236 - tmp255; + tmp402 = tmp236 + tmp255; + } + { + fftw_real tmp257; + fftw_real tmp368; + fftw_real tmp383; + fftw_real tmp384; + ASSERT_ALIGNED_DOUBLE; + tmp257 = tmp217 + tmp256; + tmp368 = tmp312 + tmp367; + c_re(output[47 * ostride]) = tmp257 - tmp368; + c_re(output[15 * ostride]) = tmp257 + tmp368; + tmp383 = tmp379 - tmp382; + tmp384 = tmp367 - tmp312; + c_im(output[63 * ostride]) = tmp383 - tmp384; + c_im(output[31 * ostride]) = tmp383 + tmp384; + } + { + fftw_real tmp389; + fftw_real tmp390; + fftw_real tmp385; + fftw_real tmp388; + ASSERT_ALIGNED_DOUBLE; + tmp389 = tmp379 + tmp382; + tmp390 = tmp386 + tmp387; + c_im(output[47 * ostride]) = tmp389 - tmp390; + c_im(output[15 * ostride]) = tmp389 + tmp390; + tmp385 = tmp217 - tmp256; + tmp388 = tmp386 - tmp387; + c_re(output[63 * ostride]) = tmp385 - tmp388; + c_re(output[31 * ostride]) = tmp385 + tmp388; + } + { + fftw_real tmp393; + fftw_real tmp400; + fftw_real tmp403; + fftw_real tmp404; + ASSERT_ALIGNED_DOUBLE; + tmp393 = tmp391 + tmp392; + tmp400 = tmp396 + tmp399; + c_re(output[39 * ostride]) = tmp393 - tmp400; + c_re(output[7 * ostride]) = tmp393 + tmp400; + tmp403 = tmp401 - tmp402; + tmp404 = tmp399 - tmp396; + c_im(output[55 * ostride]) = tmp403 - tmp404; + c_im(output[23 * ostride]) = tmp403 + tmp404; + } + { + fftw_real tmp409; + fftw_real tmp410; + fftw_real tmp405; + fftw_real tmp408; + ASSERT_ALIGNED_DOUBLE; + tmp409 = tmp401 + tmp402; + tmp410 = tmp406 + tmp407; + c_im(output[39 * ostride]) = tmp409 - tmp410; + c_im(output[7 * ostride]) = tmp409 + tmp410; + tmp405 = tmp391 - tmp392; + tmp408 = tmp406 - tmp407; + c_re(output[55 * ostride]) = tmp405 - tmp408; + c_re(output[23 * ostride]) = tmp405 + tmp408; + } + } + { + fftw_real tmp413; + fftw_real tmp451; + fftw_real tmp456; + fftw_real tmp466; + fftw_real tmp459; + fftw_real tmp467; + fftw_real tmp435; + fftw_real tmp447; + fftw_real tmp428; + fftw_real tmp446; + fftw_real tmp439; + fftw_real tmp461; + fftw_real tmp442; + fftw_real tmp452; + fftw_real tmp420; + fftw_real tmp462; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp411; + fftw_real tmp412; + fftw_real tmp454; + fftw_real tmp455; + ASSERT_ALIGNED_DOUBLE; + tmp411 = tmp193 + tmp200; + tmp412 = tmp377 + tmp376; + tmp413 = tmp411 - tmp412; + tmp451 = tmp411 + tmp412; + tmp454 = tmp422 + tmp423; + tmp455 = tmp425 + tmp426; + tmp456 = (K956940335 * tmp454) + (K290284677 * tmp455); + tmp466 = (K956940335 * tmp455) - (K290284677 * tmp454); + } + { + fftw_real tmp457; + fftw_real tmp458; + fftw_real tmp431; + fftw_real tmp434; + ASSERT_ALIGNED_DOUBLE; + tmp457 = tmp429 + tmp430; + tmp458 = tmp432 + tmp433; + tmp459 = (K956940335 * tmp457) - (K290284677 * tmp458); + tmp467 = (K290284677 * tmp457) + (K956940335 * tmp458); + tmp431 = tmp429 - tmp430; + tmp434 = tmp432 - tmp433; + tmp435 = (K471396736 * tmp431) - (K881921264 * tmp434); + tmp447 = (K881921264 * tmp431) + (K471396736 * tmp434); + } + { + fftw_real tmp424; + fftw_real tmp427; + fftw_real tmp437; + fftw_real tmp438; + ASSERT_ALIGNED_DOUBLE; + tmp424 = tmp422 - tmp423; + tmp427 = tmp425 - tmp426; + tmp428 = (K471396736 * tmp424) + (K881921264 * tmp427); + tmp446 = (K471396736 * tmp427) - (K881921264 * tmp424); + tmp437 = tmp371 + tmp374; + tmp438 = tmp208 + tmp215; + tmp439 = tmp437 - tmp438; + tmp461 = tmp437 + tmp438; + } + { + fftw_real tmp440; + fftw_real tmp441; + fftw_real tmp416; + fftw_real tmp419; + ASSERT_ALIGNED_DOUBLE; + tmp440 = (K831469612 * tmp418) - (K555570233 * tmp417); + tmp441 = (K555570233 * tmp414) + (K831469612 * tmp415); + tmp442 = tmp440 - tmp441; + tmp452 = tmp441 + tmp440; + tmp416 = (K831469612 * tmp414) - (K555570233 * tmp415); + tmp419 = (K831469612 * tmp417) + (K555570233 * tmp418); + tmp420 = tmp416 - tmp419; + tmp462 = tmp416 + tmp419; + } + { + fftw_real tmp421; + fftw_real tmp436; + fftw_real tmp443; + fftw_real tmp444; + ASSERT_ALIGNED_DOUBLE; + tmp421 = tmp413 + tmp420; + tmp436 = tmp428 + tmp435; + c_re(output[43 * ostride]) = tmp421 - tmp436; + c_re(output[11 * ostride]) = tmp421 + tmp436; + tmp443 = tmp439 - tmp442; + tmp444 = tmp435 - tmp428; + c_im(output[59 * ostride]) = tmp443 - tmp444; + c_im(output[27 * ostride]) = tmp443 + tmp444; + } + { + fftw_real tmp449; + fftw_real tmp450; + fftw_real tmp445; + fftw_real tmp448; + ASSERT_ALIGNED_DOUBLE; + tmp449 = tmp439 + tmp442; + tmp450 = tmp446 + tmp447; + c_im(output[43 * ostride]) = tmp449 - tmp450; + c_im(output[11 * ostride]) = tmp449 + tmp450; + tmp445 = tmp413 - tmp420; + tmp448 = tmp446 - tmp447; + c_re(output[59 * ostride]) = tmp445 - tmp448; + c_re(output[27 * ostride]) = tmp445 + tmp448; + } + { + fftw_real tmp453; + fftw_real tmp460; + fftw_real tmp463; + fftw_real tmp464; + ASSERT_ALIGNED_DOUBLE; + tmp453 = tmp451 + tmp452; + tmp460 = tmp456 + tmp459; + c_re(output[35 * ostride]) = tmp453 - tmp460; + c_re(output[3 * ostride]) = tmp453 + tmp460; + tmp463 = tmp461 - tmp462; + tmp464 = tmp459 - tmp456; + c_im(output[51 * ostride]) = tmp463 - tmp464; + c_im(output[19 * ostride]) = tmp463 + tmp464; + } + { + fftw_real tmp469; + fftw_real tmp470; + fftw_real tmp465; + fftw_real tmp468; + ASSERT_ALIGNED_DOUBLE; + tmp469 = tmp461 + tmp462; + tmp470 = tmp466 + tmp467; + c_im(output[35 * ostride]) = tmp469 - tmp470; + c_im(output[3 * ostride]) = tmp469 + tmp470; + tmp465 = tmp451 - tmp452; + tmp468 = tmp466 - tmp467; + c_re(output[51 * ostride]) = tmp465 - tmp468; + c_re(output[19 * ostride]) = tmp465 + tmp468; + } + } + { + fftw_real tmp817; + fftw_real tmp863; + fftw_real tmp824; + fftw_real tmp874; + fftw_real tmp854; + fftw_real tmp864; + fftw_real tmp836; + fftw_real tmp858; + fftw_real tmp851; + fftw_real tmp873; + fftw_real tmp868; + fftw_real tmp878; + fftw_real tmp847; + fftw_real tmp859; + fftw_real tmp871; + fftw_real tmp879; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp820; + fftw_real tmp823; + fftw_real tmp866; + fftw_real tmp867; + ASSERT_ALIGNED_DOUBLE; + tmp817 = tmp815 - tmp816; + tmp863 = tmp815 + tmp816; + tmp820 = tmp818 - tmp819; + tmp823 = tmp821 + tmp822; + tmp824 = K707106781 * (tmp820 - tmp823); + tmp874 = K707106781 * (tmp820 + tmp823); + { + fftw_real tmp852; + fftw_real tmp853; + fftw_real tmp830; + fftw_real tmp835; + ASSERT_ALIGNED_DOUBLE; + tmp852 = tmp821 - tmp822; + tmp853 = tmp819 + tmp818; + tmp854 = K707106781 * (tmp852 - tmp853); + tmp864 = K707106781 * (tmp853 + tmp852); + tmp830 = tmp828 - tmp829; + tmp835 = tmp831 - tmp834; + tmp836 = (K923879532 * tmp830) + (K382683432 * tmp835); + tmp858 = (K382683432 * tmp830) - (K923879532 * tmp835); + } + tmp851 = tmp849 - tmp850; + tmp873 = tmp850 + tmp849; + tmp866 = tmp829 + tmp828; + tmp867 = tmp831 + tmp834; + tmp868 = (K382683432 * tmp866) + (K923879532 * tmp867); + tmp878 = (K923879532 * tmp866) - (K382683432 * tmp867); + { + fftw_real tmp841; + fftw_real tmp846; + fftw_real tmp869; + fftw_real tmp870; + ASSERT_ALIGNED_DOUBLE; + tmp841 = tmp837 - tmp840; + tmp846 = tmp844 - tmp845; + tmp847 = (K382683432 * tmp841) - (K923879532 * tmp846); + tmp859 = (K382683432 * tmp846) + (K923879532 * tmp841); + tmp869 = tmp837 + tmp840; + tmp870 = tmp845 + tmp844; + tmp871 = (K923879532 * tmp869) - (K382683432 * tmp870); + tmp879 = (K923879532 * tmp870) + (K382683432 * tmp869); + } + } + { + fftw_real tmp825; + fftw_real tmp848; + fftw_real tmp855; + fftw_real tmp856; + ASSERT_ALIGNED_DOUBLE; + tmp825 = tmp817 + tmp824; + tmp848 = tmp836 + tmp847; + c_re(output[44 * ostride]) = tmp825 - tmp848; + c_re(output[12 * ostride]) = tmp825 + tmp848; + tmp855 = tmp851 - tmp854; + tmp856 = tmp847 - tmp836; + c_im(output[60 * ostride]) = tmp855 - tmp856; + c_im(output[28 * ostride]) = tmp855 + tmp856; + } + { + fftw_real tmp861; + fftw_real tmp862; + fftw_real tmp857; + fftw_real tmp860; + ASSERT_ALIGNED_DOUBLE; + tmp861 = tmp851 + tmp854; + tmp862 = tmp858 + tmp859; + c_im(output[44 * ostride]) = tmp861 - tmp862; + c_im(output[12 * ostride]) = tmp861 + tmp862; + tmp857 = tmp817 - tmp824; + tmp860 = tmp858 - tmp859; + c_re(output[60 * ostride]) = tmp857 - tmp860; + c_re(output[28 * ostride]) = tmp857 + tmp860; + } + { + fftw_real tmp865; + fftw_real tmp872; + fftw_real tmp875; + fftw_real tmp876; + ASSERT_ALIGNED_DOUBLE; + tmp865 = tmp863 + tmp864; + tmp872 = tmp868 + tmp871; + c_re(output[36 * ostride]) = tmp865 - tmp872; + c_re(output[4 * ostride]) = tmp865 + tmp872; + tmp875 = tmp873 - tmp874; + tmp876 = tmp871 - tmp868; + c_im(output[52 * ostride]) = tmp875 - tmp876; + c_im(output[20 * ostride]) = tmp875 + tmp876; + } + { + fftw_real tmp881; + fftw_real tmp882; + fftw_real tmp877; + fftw_real tmp880; + ASSERT_ALIGNED_DOUBLE; + tmp881 = tmp873 + tmp874; + tmp882 = tmp878 + tmp879; + c_im(output[36 * ostride]) = tmp881 - tmp882; + c_im(output[4 * ostride]) = tmp881 + tmp882; + tmp877 = tmp863 - tmp864; + tmp880 = tmp878 - tmp879; + c_re(output[52 * ostride]) = tmp877 - tmp880; + c_re(output[20 * ostride]) = tmp877 + tmp880; + } + } + { + fftw_real tmp757; + fftw_real tmp795; + fftw_real tmp800; + fftw_real tmp810; + fftw_real tmp803; + fftw_real tmp811; + fftw_real tmp779; + fftw_real tmp791; + fftw_real tmp783; + fftw_real tmp805; + fftw_real tmp764; + fftw_real tmp806; + fftw_real tmp786; + fftw_real tmp796; + fftw_real tmp772; + fftw_real tmp790; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp756; + fftw_real tmp798; + fftw_real tmp799; + fftw_real tmp782; + fftw_real tmp760; + fftw_real tmp763; + ASSERT_ALIGNED_DOUBLE; + tmp756 = K707106781 * (tmp721 + tmp720); + tmp757 = tmp755 - tmp756; + tmp795 = tmp755 + tmp756; + tmp798 = tmp766 + tmp767; + tmp799 = tmp769 + tmp770; + tmp800 = (K980785280 * tmp798) + (K195090322 * tmp799); + tmp810 = (K980785280 * tmp799) - (K195090322 * tmp798); + { + fftw_real tmp801; + fftw_real tmp802; + fftw_real tmp775; + fftw_real tmp778; + ASSERT_ALIGNED_DOUBLE; + tmp801 = tmp773 + tmp774; + tmp802 = tmp776 + tmp777; + tmp803 = (K980785280 * tmp801) - (K195090322 * tmp802); + tmp811 = (K195090322 * tmp801) + (K980785280 * tmp802); + tmp775 = tmp773 - tmp774; + tmp778 = tmp776 - tmp777; + tmp779 = (K555570233 * tmp775) - (K831469612 * tmp778); + tmp791 = (K831469612 * tmp775) + (K555570233 * tmp778); + } + tmp782 = K707106781 * (tmp640 + tmp643); + tmp783 = tmp781 - tmp782; + tmp805 = tmp781 + tmp782; + tmp760 = (K923879532 * tmp758) - (K382683432 * tmp759); + tmp763 = (K923879532 * tmp761) + (K382683432 * tmp762); + tmp764 = tmp760 - tmp763; + tmp806 = tmp760 + tmp763; + { + fftw_real tmp784; + fftw_real tmp785; + fftw_real tmp768; + fftw_real tmp771; + ASSERT_ALIGNED_DOUBLE; + tmp784 = (K923879532 * tmp762) - (K382683432 * tmp761); + tmp785 = (K382683432 * tmp758) + (K923879532 * tmp759); + tmp786 = tmp784 - tmp785; + tmp796 = tmp785 + tmp784; + tmp768 = tmp766 - tmp767; + tmp771 = tmp769 - tmp770; + tmp772 = (K555570233 * tmp768) + (K831469612 * tmp771); + tmp790 = (K555570233 * tmp771) - (K831469612 * tmp768); + } + } + { + fftw_real tmp765; + fftw_real tmp780; + fftw_real tmp787; + fftw_real tmp788; + ASSERT_ALIGNED_DOUBLE; + tmp765 = tmp757 + tmp764; + tmp780 = tmp772 + tmp779; + c_re(output[42 * ostride]) = tmp765 - tmp780; + c_re(output[10 * ostride]) = tmp765 + tmp780; + tmp787 = tmp783 - tmp786; + tmp788 = tmp779 - tmp772; + c_im(output[58 * ostride]) = tmp787 - tmp788; + c_im(output[26 * ostride]) = tmp787 + tmp788; + } + { + fftw_real tmp793; + fftw_real tmp794; + fftw_real tmp789; + fftw_real tmp792; + ASSERT_ALIGNED_DOUBLE; + tmp793 = tmp783 + tmp786; + tmp794 = tmp790 + tmp791; + c_im(output[42 * ostride]) = tmp793 - tmp794; + c_im(output[10 * ostride]) = tmp793 + tmp794; + tmp789 = tmp757 - tmp764; + tmp792 = tmp790 - tmp791; + c_re(output[58 * ostride]) = tmp789 - tmp792; + c_re(output[26 * ostride]) = tmp789 + tmp792; + } + { + fftw_real tmp797; + fftw_real tmp804; + fftw_real tmp807; + fftw_real tmp808; + ASSERT_ALIGNED_DOUBLE; + tmp797 = tmp795 + tmp796; + tmp804 = tmp800 + tmp803; + c_re(output[34 * ostride]) = tmp797 - tmp804; + c_re(output[2 * ostride]) = tmp797 + tmp804; + tmp807 = tmp805 - tmp806; + tmp808 = tmp803 - tmp800; + c_im(output[50 * ostride]) = tmp807 - tmp808; + c_im(output[18 * ostride]) = tmp807 + tmp808; + } + { + fftw_real tmp813; + fftw_real tmp814; + fftw_real tmp809; + fftw_real tmp812; + ASSERT_ALIGNED_DOUBLE; + tmp813 = tmp805 + tmp806; + tmp814 = tmp810 + tmp811; + c_im(output[34 * ostride]) = tmp813 - tmp814; + c_im(output[2 * ostride]) = tmp813 + tmp814; + tmp809 = tmp795 - tmp796; + tmp812 = tmp810 - tmp811; + c_re(output[50 * ostride]) = tmp809 - tmp812; + c_re(output[18 * ostride]) = tmp809 + tmp812; + } + } + { + fftw_real tmp645; + fftw_real tmp735; + fftw_real tmp740; + fftw_real tmp750; + fftw_real tmp743; + fftw_real tmp751; + fftw_real tmp715; + fftw_real tmp731; + fftw_real tmp723; + fftw_real tmp745; + fftw_real tmp660; + fftw_real tmp746; + fftw_real tmp726; + fftw_real tmp736; + fftw_real tmp688; + fftw_real tmp730; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp644; + fftw_real tmp738; + fftw_real tmp739; + fftw_real tmp722; + fftw_real tmp652; + fftw_real tmp659; + ASSERT_ALIGNED_DOUBLE; + tmp644 = K707106781 * (tmp640 - tmp643); + tmp645 = tmp637 - tmp644; + tmp735 = tmp637 + tmp644; + tmp738 = tmp666 + tmp677; + tmp739 = tmp683 + tmp686; + tmp740 = (K555570233 * tmp738) + (K831469612 * tmp739); + tmp750 = (K831469612 * tmp738) - (K555570233 * tmp739); + { + fftw_real tmp741; + fftw_real tmp742; + fftw_real tmp705; + fftw_real tmp714; + ASSERT_ALIGNED_DOUBLE; + tmp741 = tmp693 + tmp704; + tmp742 = tmp710 + tmp713; + tmp743 = (K831469612 * tmp741) - (K555570233 * tmp742); + tmp751 = (K831469612 * tmp742) + (K555570233 * tmp741); + tmp705 = tmp693 - tmp704; + tmp714 = tmp710 - tmp713; + tmp715 = (K195090322 * tmp705) - (K980785280 * tmp714); + tmp731 = (K195090322 * tmp714) + (K980785280 * tmp705); + } + tmp722 = K707106781 * (tmp720 - tmp721); + tmp723 = tmp719 - tmp722; + tmp745 = tmp719 + tmp722; + tmp652 = (K382683432 * tmp648) - (K923879532 * tmp651); + tmp659 = (K382683432 * tmp655) + (K923879532 * tmp658); + tmp660 = tmp652 - tmp659; + tmp746 = tmp652 + tmp659; + { + fftw_real tmp724; + fftw_real tmp725; + fftw_real tmp678; + fftw_real tmp687; + ASSERT_ALIGNED_DOUBLE; + tmp724 = (K382683432 * tmp658) - (K923879532 * tmp655); + tmp725 = (K923879532 * tmp648) + (K382683432 * tmp651); + tmp726 = tmp724 - tmp725; + tmp736 = tmp725 + tmp724; + tmp678 = tmp666 - tmp677; + tmp687 = tmp683 - tmp686; + tmp688 = (K980785280 * tmp678) + (K195090322 * tmp687); + tmp730 = (K195090322 * tmp678) - (K980785280 * tmp687); + } + } + { + fftw_real tmp661; + fftw_real tmp716; + fftw_real tmp727; + fftw_real tmp728; + ASSERT_ALIGNED_DOUBLE; + tmp661 = tmp645 + tmp660; + tmp716 = tmp688 + tmp715; + c_re(output[46 * ostride]) = tmp661 - tmp716; + c_re(output[14 * ostride]) = tmp661 + tmp716; + tmp727 = tmp723 - tmp726; + tmp728 = tmp715 - tmp688; + c_im(output[62 * ostride]) = tmp727 - tmp728; + c_im(output[30 * ostride]) = tmp727 + tmp728; + } + { + fftw_real tmp733; + fftw_real tmp734; + fftw_real tmp729; + fftw_real tmp732; + ASSERT_ALIGNED_DOUBLE; + tmp733 = tmp723 + tmp726; + tmp734 = tmp730 + tmp731; + c_im(output[46 * ostride]) = tmp733 - tmp734; + c_im(output[14 * ostride]) = tmp733 + tmp734; + tmp729 = tmp645 - tmp660; + tmp732 = tmp730 - tmp731; + c_re(output[62 * ostride]) = tmp729 - tmp732; + c_re(output[30 * ostride]) = tmp729 + tmp732; + } + { + fftw_real tmp737; + fftw_real tmp744; + fftw_real tmp747; + fftw_real tmp748; + ASSERT_ALIGNED_DOUBLE; + tmp737 = tmp735 + tmp736; + tmp744 = tmp740 + tmp743; + c_re(output[38 * ostride]) = tmp737 - tmp744; + c_re(output[6 * ostride]) = tmp737 + tmp744; + tmp747 = tmp745 - tmp746; + tmp748 = tmp743 - tmp740; + c_im(output[54 * ostride]) = tmp747 - tmp748; + c_im(output[22 * ostride]) = tmp747 + tmp748; + } + { + fftw_real tmp753; + fftw_real tmp754; + fftw_real tmp749; + fftw_real tmp752; + ASSERT_ALIGNED_DOUBLE; + tmp753 = tmp745 + tmp746; + tmp754 = tmp750 + tmp751; + c_im(output[38 * ostride]) = tmp753 - tmp754; + c_im(output[6 * ostride]) = tmp753 + tmp754; + tmp749 = tmp735 - tmp736; + tmp752 = tmp750 - tmp751; + c_re(output[54 * ostride]) = tmp749 - tmp752; + c_re(output[22 * ostride]) = tmp749 + tmp752; + } + } + { + fftw_real tmp481; + fftw_real tmp555; + fftw_real tmp560; + fftw_real tmp570; + fftw_real tmp563; + fftw_real tmp571; + fftw_real tmp535; + fftw_real tmp551; + fftw_real tmp516; + fftw_real tmp550; + fftw_real tmp543; + fftw_real tmp565; + fftw_real tmp546; + fftw_real tmp556; + fftw_real tmp496; + fftw_real tmp566; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp473; + fftw_real tmp480; + fftw_real tmp558; + fftw_real tmp559; + ASSERT_ALIGNED_DOUBLE; + tmp473 = tmp471 - tmp472; + tmp480 = tmp476 - tmp479; + tmp481 = tmp473 - tmp480; + tmp555 = tmp473 + tmp480; + tmp558 = tmp500 + tmp507; + tmp559 = tmp511 + tmp514; + tmp560 = (K471396736 * tmp558) + (K881921264 * tmp559); + tmp570 = (K881921264 * tmp558) - (K471396736 * tmp559); + } + { + fftw_real tmp561; + fftw_real tmp562; + fftw_real tmp527; + fftw_real tmp534; + ASSERT_ALIGNED_DOUBLE; + tmp561 = tmp519 + tmp526; + tmp562 = tmp530 + tmp533; + tmp563 = (K881921264 * tmp561) - (K471396736 * tmp562); + tmp571 = (K881921264 * tmp562) + (K471396736 * tmp561); + tmp527 = tmp519 - tmp526; + tmp534 = tmp530 - tmp533; + tmp535 = (K290284677 * tmp527) - (K956940335 * tmp534); + tmp551 = (K290284677 * tmp534) + (K956940335 * tmp527); + } + { + fftw_real tmp508; + fftw_real tmp515; + fftw_real tmp539; + fftw_real tmp542; + ASSERT_ALIGNED_DOUBLE; + tmp508 = tmp500 - tmp507; + tmp515 = tmp511 - tmp514; + tmp516 = (K956940335 * tmp508) + (K290284677 * tmp515); + tmp550 = (K290284677 * tmp508) - (K956940335 * tmp515); + tmp539 = tmp537 - tmp538; + tmp542 = tmp540 - tmp541; + tmp543 = tmp539 - tmp542; + tmp565 = tmp539 + tmp542; + } + { + fftw_real tmp544; + fftw_real tmp545; + fftw_real tmp488; + fftw_real tmp495; + ASSERT_ALIGNED_DOUBLE; + tmp544 = (K555570233 * tmp491) - (K831469612 * tmp494); + tmp545 = (K555570233 * tmp487) + (K831469612 * tmp484); + tmp546 = tmp544 - tmp545; + tmp556 = tmp545 + tmp544; + tmp488 = (K555570233 * tmp484) - (K831469612 * tmp487); + tmp495 = (K831469612 * tmp491) + (K555570233 * tmp494); + tmp496 = tmp488 - tmp495; + tmp566 = tmp488 + tmp495; + } + { + fftw_real tmp497; + fftw_real tmp536; + fftw_real tmp547; + fftw_real tmp548; + ASSERT_ALIGNED_DOUBLE; + tmp497 = tmp481 + tmp496; + tmp536 = tmp516 + tmp535; + c_re(output[45 * ostride]) = tmp497 - tmp536; + c_re(output[13 * ostride]) = tmp497 + tmp536; + tmp547 = tmp543 - tmp546; + tmp548 = tmp535 - tmp516; + c_im(output[61 * ostride]) = tmp547 - tmp548; + c_im(output[29 * ostride]) = tmp547 + tmp548; + } + { + fftw_real tmp553; + fftw_real tmp554; + fftw_real tmp549; + fftw_real tmp552; + ASSERT_ALIGNED_DOUBLE; + tmp553 = tmp543 + tmp546; + tmp554 = tmp550 + tmp551; + c_im(output[45 * ostride]) = tmp553 - tmp554; + c_im(output[13 * ostride]) = tmp553 + tmp554; + tmp549 = tmp481 - tmp496; + tmp552 = tmp550 - tmp551; + c_re(output[61 * ostride]) = tmp549 - tmp552; + c_re(output[29 * ostride]) = tmp549 + tmp552; + } + { + fftw_real tmp557; + fftw_real tmp564; + fftw_real tmp567; + fftw_real tmp568; + ASSERT_ALIGNED_DOUBLE; + tmp557 = tmp555 + tmp556; + tmp564 = tmp560 + tmp563; + c_re(output[37 * ostride]) = tmp557 - tmp564; + c_re(output[5 * ostride]) = tmp557 + tmp564; + tmp567 = tmp565 - tmp566; + tmp568 = tmp563 - tmp560; + c_im(output[53 * ostride]) = tmp567 - tmp568; + c_im(output[21 * ostride]) = tmp567 + tmp568; + } + { + fftw_real tmp573; + fftw_real tmp574; + fftw_real tmp569; + fftw_real tmp572; + ASSERT_ALIGNED_DOUBLE; + tmp573 = tmp565 + tmp566; + tmp574 = tmp570 + tmp571; + c_im(output[37 * ostride]) = tmp573 - tmp574; + c_im(output[5 * ostride]) = tmp573 + tmp574; + tmp569 = tmp555 - tmp556; + tmp572 = tmp570 - tmp571; + c_re(output[53 * ostride]) = tmp569 - tmp572; + c_re(output[21 * ostride]) = tmp569 + tmp572; + } + } + { + fftw_real tmp577; + fftw_real tmp615; + fftw_real tmp620; + fftw_real tmp630; + fftw_real tmp623; + fftw_real tmp631; + fftw_real tmp599; + fftw_real tmp611; + fftw_real tmp592; + fftw_real tmp610; + fftw_real tmp603; + fftw_real tmp625; + fftw_real tmp606; + fftw_real tmp616; + fftw_real tmp584; + fftw_real tmp626; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp575; + fftw_real tmp576; + fftw_real tmp618; + fftw_real tmp619; + ASSERT_ALIGNED_DOUBLE; + tmp575 = tmp471 + tmp472; + tmp576 = tmp541 + tmp540; + tmp577 = tmp575 - tmp576; + tmp615 = tmp575 + tmp576; + tmp618 = tmp586 + tmp587; + tmp619 = tmp589 + tmp590; + tmp620 = (K995184726 * tmp618) + (K098017140 * tmp619); + tmp630 = (K995184726 * tmp619) - (K098017140 * tmp618); + } + { + fftw_real tmp621; + fftw_real tmp622; + fftw_real tmp595; + fftw_real tmp598; + ASSERT_ALIGNED_DOUBLE; + tmp621 = tmp593 + tmp594; + tmp622 = tmp596 + tmp597; + tmp623 = (K995184726 * tmp621) - (K098017140 * tmp622); + tmp631 = (K098017140 * tmp621) + (K995184726 * tmp622); + tmp595 = tmp593 - tmp594; + tmp598 = tmp596 - tmp597; + tmp599 = (K634393284 * tmp595) - (K773010453 * tmp598); + tmp611 = (K773010453 * tmp595) + (K634393284 * tmp598); + } + { + fftw_real tmp588; + fftw_real tmp591; + fftw_real tmp601; + fftw_real tmp602; + ASSERT_ALIGNED_DOUBLE; + tmp588 = tmp586 - tmp587; + tmp591 = tmp589 - tmp590; + tmp592 = (K634393284 * tmp588) + (K773010453 * tmp591); + tmp610 = (K634393284 * tmp591) - (K773010453 * tmp588); + tmp601 = tmp537 + tmp538; + tmp602 = tmp476 + tmp479; + tmp603 = tmp601 - tmp602; + tmp625 = tmp601 + tmp602; + } + { + fftw_real tmp604; + fftw_real tmp605; + fftw_real tmp580; + fftw_real tmp583; + ASSERT_ALIGNED_DOUBLE; + tmp604 = (K980785280 * tmp581) - (K195090322 * tmp582); + tmp605 = (K980785280 * tmp579) + (K195090322 * tmp578); + tmp606 = tmp604 - tmp605; + tmp616 = tmp605 + tmp604; + tmp580 = (K980785280 * tmp578) - (K195090322 * tmp579); + tmp583 = (K195090322 * tmp581) + (K980785280 * tmp582); + tmp584 = tmp580 - tmp583; + tmp626 = tmp580 + tmp583; + } + { + fftw_real tmp585; + fftw_real tmp600; + fftw_real tmp607; + fftw_real tmp608; + ASSERT_ALIGNED_DOUBLE; + tmp585 = tmp577 + tmp584; + tmp600 = tmp592 + tmp599; + c_re(output[41 * ostride]) = tmp585 - tmp600; + c_re(output[9 * ostride]) = tmp585 + tmp600; + tmp607 = tmp603 - tmp606; + tmp608 = tmp599 - tmp592; + c_im(output[57 * ostride]) = tmp607 - tmp608; + c_im(output[25 * ostride]) = tmp607 + tmp608; + } + { + fftw_real tmp613; + fftw_real tmp614; + fftw_real tmp609; + fftw_real tmp612; + ASSERT_ALIGNED_DOUBLE; + tmp613 = tmp603 + tmp606; + tmp614 = tmp610 + tmp611; + c_im(output[41 * ostride]) = tmp613 - tmp614; + c_im(output[9 * ostride]) = tmp613 + tmp614; + tmp609 = tmp577 - tmp584; + tmp612 = tmp610 - tmp611; + c_re(output[57 * ostride]) = tmp609 - tmp612; + c_re(output[25 * ostride]) = tmp609 + tmp612; + } + { + fftw_real tmp617; + fftw_real tmp624; + fftw_real tmp627; + fftw_real tmp628; + ASSERT_ALIGNED_DOUBLE; + tmp617 = tmp615 + tmp616; + tmp624 = tmp620 + tmp623; + c_re(output[33 * ostride]) = tmp617 - tmp624; + c_re(output[ostride]) = tmp617 + tmp624; + tmp627 = tmp625 - tmp626; + tmp628 = tmp623 - tmp620; + c_im(output[49 * ostride]) = tmp627 - tmp628; + c_im(output[17 * ostride]) = tmp627 + tmp628; + } + { + fftw_real tmp633; + fftw_real tmp634; + fftw_real tmp629; + fftw_real tmp632; + ASSERT_ALIGNED_DOUBLE; + tmp633 = tmp625 + tmp626; + tmp634 = tmp630 + tmp631; + c_im(output[33 * ostride]) = tmp633 - tmp634; + c_im(output[ostride]) = tmp633 + tmp634; + tmp629 = tmp615 - tmp616; + tmp632 = tmp630 - tmp631; + c_re(output[49 * ostride]) = tmp629 - tmp632; + c_re(output[17 * ostride]) = tmp629 + tmp632; + } + } +} + +fftw_codelet_desc fftw_no_twiddle_64_desc = { + "fftw_no_twiddle_64", + (void (*)()) fftw_no_twiddle_64, + 64, + FFTW_FORWARD, + FFTW_NOTW, + 1409, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_7.c b/src/fftw/fn_7.c new file mode 100644 index 0000000..c9acc08 --- /dev/null +++ b/src/fftw/fn_7.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:37 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 7 */ + +/* + * This function contains 60 FP additions, 36 FP multiplications, + * (or, 60 additions, 36 multiplications, 0 fused multiply/add), + * 22 stack variables, and 28 memory accesses + */ +static const fftw_real K222520933 = +FFTW_KONST(+0.222520933956314404288902564496794759466355569); +static const fftw_real K900968867 = +FFTW_KONST(+0.900968867902419126236102319507445051165919162); +static const fftw_real K623489801 = +FFTW_KONST(+0.623489801858733530525004884004239810632274731); +static const fftw_real K781831482 = +FFTW_KONST(+0.781831482468029808708444526674057750232334519); +static const fftw_real K433883739 = +FFTW_KONST(+0.433883739117558120475768332848358754609990728); +static const fftw_real K974927912 = +FFTW_KONST(+0.974927912181823607018131682993931217232785801); + +/* + * Generator Id's : + * $Id: fn_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_7(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp31; + fftw_real tmp4; + fftw_real tmp26; + fftw_real tmp14; + fftw_real tmp33; + fftw_real tmp7; + fftw_real tmp28; + fftw_real tmp20; + fftw_real tmp30; + fftw_real tmp10; + fftw_real tmp27; + fftw_real tmp17; + fftw_real tmp32; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp31 = c_im(input[0]); + { + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp12; + fftw_real tmp13; + ASSERT_ALIGNED_DOUBLE; + tmp2 = c_re(input[istride]); + tmp3 = c_re(input[6 * istride]); + tmp4 = tmp2 + tmp3; + tmp26 = tmp3 - tmp2; + tmp12 = c_im(input[istride]); + tmp13 = c_im(input[6 * istride]); + tmp14 = tmp12 - tmp13; + tmp33 = tmp12 + tmp13; + } + { + fftw_real tmp5; + fftw_real tmp6; + fftw_real tmp18; + fftw_real tmp19; + ASSERT_ALIGNED_DOUBLE; + tmp5 = c_re(input[2 * istride]); + tmp6 = c_re(input[5 * istride]); + tmp7 = tmp5 + tmp6; + tmp28 = tmp6 - tmp5; + tmp18 = c_im(input[2 * istride]); + tmp19 = c_im(input[5 * istride]); + tmp20 = tmp18 - tmp19; + tmp30 = tmp18 + tmp19; + } + { + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp15; + fftw_real tmp16; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(input[3 * istride]); + tmp9 = c_re(input[4 * istride]); + tmp10 = tmp8 + tmp9; + tmp27 = tmp9 - tmp8; + tmp15 = c_im(input[3 * istride]); + tmp16 = c_im(input[4 * istride]); + tmp17 = tmp15 - tmp16; + tmp32 = tmp15 + tmp16; + } + { + fftw_real tmp23; + fftw_real tmp22; + fftw_real tmp35; + fftw_real tmp36; + ASSERT_ALIGNED_DOUBLE; + c_re(output[0]) = tmp1 + tmp4 + tmp7 + tmp10; + tmp23 = + (K974927912 * tmp14) - (K433883739 * tmp20) - + (K781831482 * tmp17); + tmp22 = + tmp1 + (K623489801 * tmp10) - (K900968867 * tmp7) - + (K222520933 * tmp4); + c_re(output[5 * ostride]) = tmp22 - tmp23; + c_re(output[2 * ostride]) = tmp22 + tmp23; + { + fftw_real tmp25; + fftw_real tmp24; + fftw_real tmp21; + fftw_real tmp11; + ASSERT_ALIGNED_DOUBLE; + tmp25 = + (K781831482 * tmp14) + (K433883739 * tmp17) + + (K974927912 * tmp20); + tmp24 = + tmp1 + (K623489801 * tmp4) - (K900968867 * tmp10) - + (K222520933 * tmp7); + c_re(output[6 * ostride]) = tmp24 - tmp25; + c_re(output[ostride]) = tmp24 + tmp25; + tmp21 = + (K433883739 * tmp14) + (K974927912 * tmp17) - + (K781831482 * tmp20); + tmp11 = + tmp1 + (K623489801 * tmp7) - (K222520933 * tmp10) - + (K900968867 * tmp4); + c_re(output[4 * ostride]) = tmp11 - tmp21; + c_re(output[3 * ostride]) = tmp11 + tmp21; + } + c_im(output[0]) = tmp33 + tmp32 + tmp30 + tmp31; + tmp35 = + (K781831482 * tmp26) + (K974927912 * tmp28) + + (K433883739 * tmp27); + tmp36 = + (K623489801 * tmp33) + tmp31 - (K222520933 * tmp30) - + (K900968867 * tmp32); + c_im(output[ostride]) = tmp35 + tmp36; + c_im(output[6 * ostride]) = tmp36 - tmp35; + { + fftw_real tmp29; + fftw_real tmp34; + fftw_real tmp37; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp29 = + (K433883739 * tmp26) + (K974927912 * tmp27) - + (K781831482 * tmp28); + tmp34 = + (K623489801 * tmp30) + tmp31 - (K222520933 * tmp32) - + (K900968867 * tmp33); + c_im(output[3 * ostride]) = tmp29 + tmp34; + c_im(output[4 * ostride]) = tmp34 - tmp29; + tmp37 = + (K974927912 * tmp26) - (K781831482 * tmp27) - + (K433883739 * tmp28); + tmp38 = + (K623489801 * tmp32) + tmp31 - (K900968867 * tmp30) - + (K222520933 * tmp33); + c_im(output[2 * ostride]) = tmp37 + tmp38; + c_im(output[5 * ostride]) = tmp38 - tmp37; + } + } +} + +fftw_codelet_desc fftw_no_twiddle_7_desc = { + "fftw_no_twiddle_7", + (void (*)()) fftw_no_twiddle_7, + 7, + FFTW_FORWARD, + FFTW_NOTW, + 155, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_8.c b/src/fftw/fn_8.c new file mode 100644 index 0000000..d2075fb --- /dev/null +++ b/src/fftw/fn_8.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:37 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 8 */ + +/* + * This function contains 52 FP additions, 4 FP multiplications, + * (or, 52 additions, 4 multiplications, 0 fused multiply/add), + * 26 stack variables, and 32 memory accesses + */ +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: fn_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_8(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp3; + fftw_real tmp23; + fftw_real tmp18; + fftw_real tmp38; + fftw_real tmp6; + fftw_real tmp37; + fftw_real tmp21; + fftw_real tmp24; + fftw_real tmp13; + fftw_real tmp49; + fftw_real tmp35; + fftw_real tmp43; + fftw_real tmp10; + fftw_real tmp48; + fftw_real tmp30; + fftw_real tmp42; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp19; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[4 * istride]); + tmp3 = tmp1 + tmp2; + tmp23 = tmp1 - tmp2; + { + fftw_real tmp16; + fftw_real tmp17; + fftw_real tmp4; + fftw_real tmp5; + ASSERT_ALIGNED_DOUBLE; + tmp16 = c_im(input[0]); + tmp17 = c_im(input[4 * istride]); + tmp18 = tmp16 + tmp17; + tmp38 = tmp16 - tmp17; + tmp4 = c_re(input[2 * istride]); + tmp5 = c_re(input[6 * istride]); + tmp6 = tmp4 + tmp5; + tmp37 = tmp4 - tmp5; + } + tmp19 = c_im(input[2 * istride]); + tmp20 = c_im(input[6 * istride]); + tmp21 = tmp19 + tmp20; + tmp24 = tmp19 - tmp20; + { + fftw_real tmp11; + fftw_real tmp12; + fftw_real tmp31; + fftw_real tmp32; + fftw_real tmp33; + fftw_real tmp34; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[7 * istride]); + tmp12 = c_re(input[3 * istride]); + tmp31 = tmp11 - tmp12; + tmp32 = c_im(input[7 * istride]); + tmp33 = c_im(input[3 * istride]); + tmp34 = tmp32 - tmp33; + tmp13 = tmp11 + tmp12; + tmp49 = tmp32 + tmp33; + tmp35 = tmp31 - tmp34; + tmp43 = tmp31 + tmp34; + } + { + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp26; + fftw_real tmp27; + fftw_real tmp28; + fftw_real tmp29; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(input[istride]); + tmp9 = c_re(input[5 * istride]); + tmp26 = tmp8 - tmp9; + tmp27 = c_im(input[istride]); + tmp28 = c_im(input[5 * istride]); + tmp29 = tmp27 - tmp28; + tmp10 = tmp8 + tmp9; + tmp48 = tmp27 + tmp28; + tmp30 = tmp26 + tmp29; + tmp42 = tmp29 - tmp26; + } + } + { + fftw_real tmp7; + fftw_real tmp14; + fftw_real tmp15; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp7 = tmp3 + tmp6; + tmp14 = tmp10 + tmp13; + c_re(output[4 * ostride]) = tmp7 - tmp14; + c_re(output[0]) = tmp7 + tmp14; + tmp15 = tmp13 - tmp10; + tmp22 = tmp18 - tmp21; + c_im(output[2 * ostride]) = tmp15 + tmp22; + c_im(output[6 * ostride]) = tmp22 - tmp15; + } + { + fftw_real tmp51; + fftw_real tmp52; + fftw_real tmp47; + fftw_real tmp50; + ASSERT_ALIGNED_DOUBLE; + tmp51 = tmp18 + tmp21; + tmp52 = tmp48 + tmp49; + c_im(output[4 * ostride]) = tmp51 - tmp52; + c_im(output[0]) = tmp51 + tmp52; + tmp47 = tmp3 - tmp6; + tmp50 = tmp48 - tmp49; + c_re(output[6 * ostride]) = tmp47 - tmp50; + c_re(output[2 * ostride]) = tmp47 + tmp50; + } + { + fftw_real tmp25; + fftw_real tmp36; + fftw_real tmp39; + fftw_real tmp40; + ASSERT_ALIGNED_DOUBLE; + tmp25 = tmp23 + tmp24; + tmp36 = K707106781 * (tmp30 + tmp35); + c_re(output[5 * ostride]) = tmp25 - tmp36; + c_re(output[ostride]) = tmp25 + tmp36; + tmp39 = tmp37 + tmp38; + tmp40 = K707106781 * (tmp35 - tmp30); + c_im(output[7 * ostride]) = tmp39 - tmp40; + c_im(output[3 * ostride]) = tmp39 + tmp40; + } + { + fftw_real tmp45; + fftw_real tmp46; + fftw_real tmp41; + fftw_real tmp44; + ASSERT_ALIGNED_DOUBLE; + tmp45 = tmp38 - tmp37; + tmp46 = K707106781 * (tmp42 + tmp43); + c_im(output[5 * ostride]) = tmp45 - tmp46; + c_im(output[ostride]) = tmp45 + tmp46; + tmp41 = tmp23 - tmp24; + tmp44 = K707106781 * (tmp42 - tmp43); + c_re(output[7 * ostride]) = tmp41 - tmp44; + c_re(output[3 * ostride]) = tmp41 + tmp44; + } +} + +fftw_codelet_desc fftw_no_twiddle_8_desc = { + "fftw_no_twiddle_8", + (void (*)()) fftw_no_twiddle_8, + 8, + FFTW_FORWARD, + FFTW_NOTW, + 177, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fn_9.c b/src/fftw/fn_9.c new file mode 100644 index 0000000..837ece7 --- /dev/null +++ b/src/fftw/fn_9.c @@ -0,0 +1,283 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:05:38 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 9 */ + +/* + * This function contains 80 FP additions, 40 FP multiplications, + * (or, 60 additions, 20 multiplications, 20 fused multiply/add), + * 30 stack variables, and 36 memory accesses + */ +static const fftw_real K939692620 = +FFTW_KONST(+0.939692620785908384054109277324731469936208134); +static const fftw_real K342020143 = +FFTW_KONST(+0.342020143325668733044099614682259580763083368); +static const fftw_real K984807753 = +FFTW_KONST(+0.984807753012208059366743024589523013670643252); +static const fftw_real K173648177 = +FFTW_KONST(+0.173648177666930348851716626769314796000375677); +static const fftw_real K642787609 = +FFTW_KONST(+0.642787609686539326322643409907263432907559884); +static const fftw_real K766044443 = +FFTW_KONST(+0.766044443118978035202392650555416673935832457); +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); + +/* + * Generator Id's : + * $Id: fn_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fn_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_no_twiddle_9(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp5; + fftw_real tmp17; + fftw_real tmp50; + fftw_real tmp20; + fftw_real tmp78; + fftw_real tmp53; + fftw_real tmp10; + fftw_real tmp26; + fftw_real tmp58; + fftw_real tmp74; + fftw_real tmp31; + fftw_real tmp59; + fftw_real tmp15; + fftw_real tmp37; + fftw_real tmp62; + fftw_real tmp75; + fftw_real tmp42; + fftw_real tmp61; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[3 * istride]); + tmp3 = c_re(input[6 * istride]); + tmp4 = tmp2 + tmp3; + tmp5 = tmp1 + tmp4; + tmp17 = tmp1 - (K500000000 * tmp4); + tmp50 = K866025403 * (tmp3 - tmp2); + } + { + fftw_real tmp51; + fftw_real tmp18; + fftw_real tmp19; + fftw_real tmp52; + ASSERT_ALIGNED_DOUBLE; + tmp51 = c_im(input[0]); + tmp18 = c_im(input[3 * istride]); + tmp19 = c_im(input[6 * istride]); + tmp52 = tmp18 + tmp19; + tmp20 = K866025403 * (tmp18 - tmp19); + tmp78 = tmp51 + tmp52; + tmp53 = tmp51 - (K500000000 * tmp52); + } + { + fftw_real tmp6; + fftw_real tmp28; + fftw_real tmp9; + fftw_real tmp27; + fftw_real tmp25; + fftw_real tmp29; + fftw_real tmp22; + fftw_real tmp30; + ASSERT_ALIGNED_DOUBLE; + tmp6 = c_re(input[istride]); + tmp28 = c_im(input[istride]); + { + fftw_real tmp7; + fftw_real tmp8; + fftw_real tmp23; + fftw_real tmp24; + ASSERT_ALIGNED_DOUBLE; + tmp7 = c_re(input[4 * istride]); + tmp8 = c_re(input[7 * istride]); + tmp9 = tmp7 + tmp8; + tmp27 = K866025403 * (tmp8 - tmp7); + tmp23 = c_im(input[4 * istride]); + tmp24 = c_im(input[7 * istride]); + tmp25 = K866025403 * (tmp23 - tmp24); + tmp29 = tmp23 + tmp24; + } + tmp10 = tmp6 + tmp9; + tmp22 = tmp6 - (K500000000 * tmp9); + tmp26 = tmp22 + tmp25; + tmp58 = tmp22 - tmp25; + tmp74 = tmp28 + tmp29; + tmp30 = tmp28 - (K500000000 * tmp29); + tmp31 = tmp27 + tmp30; + tmp59 = tmp30 - tmp27; + } + { + fftw_real tmp11; + fftw_real tmp39; + fftw_real tmp14; + fftw_real tmp38; + fftw_real tmp36; + fftw_real tmp40; + fftw_real tmp33; + fftw_real tmp41; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[2 * istride]); + tmp39 = c_im(input[2 * istride]); + { + fftw_real tmp12; + fftw_real tmp13; + fftw_real tmp34; + fftw_real tmp35; + ASSERT_ALIGNED_DOUBLE; + tmp12 = c_re(input[5 * istride]); + tmp13 = c_re(input[8 * istride]); + tmp14 = tmp12 + tmp13; + tmp38 = K866025403 * (tmp13 - tmp12); + tmp34 = c_im(input[5 * istride]); + tmp35 = c_im(input[8 * istride]); + tmp36 = K866025403 * (tmp34 - tmp35); + tmp40 = tmp34 + tmp35; + } + tmp15 = tmp11 + tmp14; + tmp33 = tmp11 - (K500000000 * tmp14); + tmp37 = tmp33 + tmp36; + tmp62 = tmp33 - tmp36; + tmp75 = tmp39 + tmp40; + tmp41 = tmp39 - (K500000000 * tmp40); + tmp42 = tmp38 + tmp41; + tmp61 = tmp41 - tmp38; + } + { + fftw_real tmp76; + fftw_real tmp16; + fftw_real tmp73; + fftw_real tmp77; + fftw_real tmp79; + fftw_real tmp80; + ASSERT_ALIGNED_DOUBLE; + tmp76 = K866025403 * (tmp74 - tmp75); + tmp16 = tmp10 + tmp15; + tmp73 = tmp5 - (K500000000 * tmp16); + c_re(output[0]) = tmp5 + tmp16; + c_re(output[3 * ostride]) = tmp73 + tmp76; + c_re(output[6 * ostride]) = tmp73 - tmp76; + tmp77 = K866025403 * (tmp15 - tmp10); + tmp79 = tmp74 + tmp75; + tmp80 = tmp78 - (K500000000 * tmp79); + c_im(output[3 * ostride]) = tmp77 + tmp80; + c_im(output[6 * ostride]) = tmp80 - tmp77; + c_im(output[0]) = tmp78 + tmp79; + } + { + fftw_real tmp21; + fftw_real tmp54; + fftw_real tmp44; + fftw_real tmp49; + fftw_real tmp48; + fftw_real tmp55; + fftw_real tmp45; + fftw_real tmp56; + ASSERT_ALIGNED_DOUBLE; + tmp21 = tmp17 + tmp20; + tmp54 = tmp50 + tmp53; + { + fftw_real tmp32; + fftw_real tmp43; + fftw_real tmp46; + fftw_real tmp47; + ASSERT_ALIGNED_DOUBLE; + tmp32 = (K766044443 * tmp26) + (K642787609 * tmp31); + tmp43 = (K173648177 * tmp37) + (K984807753 * tmp42); + tmp44 = tmp32 + tmp43; + tmp49 = K866025403 * (tmp43 - tmp32); + tmp46 = (K766044443 * tmp31) - (K642787609 * tmp26); + tmp47 = (K173648177 * tmp42) - (K984807753 * tmp37); + tmp48 = K866025403 * (tmp46 - tmp47); + tmp55 = tmp46 + tmp47; + } + c_re(output[ostride]) = tmp21 + tmp44; + tmp45 = tmp21 - (K500000000 * tmp44); + c_re(output[7 * ostride]) = tmp45 - tmp48; + c_re(output[4 * ostride]) = tmp45 + tmp48; + c_im(output[ostride]) = tmp54 + tmp55; + tmp56 = tmp54 - (K500000000 * tmp55); + c_im(output[4 * ostride]) = tmp49 + tmp56; + c_im(output[7 * ostride]) = tmp56 - tmp49; + } + { + fftw_real tmp57; + fftw_real tmp69; + fftw_real tmp72; + fftw_real tmp66; + fftw_real tmp64; + fftw_real tmp65; + fftw_real tmp71; + fftw_real tmp70; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp67; + fftw_real tmp68; + fftw_real tmp60; + fftw_real tmp63; + ASSERT_ALIGNED_DOUBLE; + tmp57 = tmp17 - tmp20; + tmp67 = (K173648177 * tmp59) - (K984807753 * tmp58); + tmp68 = (K342020143 * tmp62) + (K939692620 * tmp61); + tmp69 = tmp67 - tmp68; + tmp72 = K866025403 * (tmp67 + tmp68); + tmp66 = tmp53 - tmp50; + tmp60 = (K173648177 * tmp58) + (K984807753 * tmp59); + tmp63 = (K342020143 * tmp61) - (K939692620 * tmp62); + tmp64 = tmp60 + tmp63; + tmp65 = K866025403 * (tmp63 - tmp60); + } + c_re(output[2 * ostride]) = tmp57 + tmp64; + tmp71 = tmp57 - (K500000000 * tmp64); + c_re(output[8 * ostride]) = tmp71 - tmp72; + c_re(output[5 * ostride]) = tmp71 + tmp72; + c_im(output[2 * ostride]) = tmp66 + tmp69; + tmp70 = tmp66 - (K500000000 * tmp69); + c_im(output[5 * ostride]) = tmp65 + tmp70; + c_im(output[8 * ostride]) = tmp70 - tmp65; + } +} + +fftw_codelet_desc fftw_no_twiddle_9_desc = { + "fftw_no_twiddle_9", + (void (*)()) fftw_no_twiddle_9, + 9, + FFTW_FORWARD, + FFTW_NOTW, + 199, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_1.c b/src/fftw/fni_1.c new file mode 100644 index 0000000..b06ba0e --- /dev/null +++ b/src/fftw/fni_1.c @@ -0,0 +1,62 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:18 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 1 */ + +/* + * This function contains 0 FP additions, 0 FP multiplications, + * (or, 0 additions, 0 multiplications, 0 fused multiply/add), + * 2 stack variables, and 4 memory accesses + */ + +/* + * Generator Id's : + * $Id: fni_1.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_1.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_1.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_1(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp2; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + c_re(output[0]) = tmp1; + tmp2 = c_im(input[0]); + c_im(output[0]) = tmp2; +} + +fftw_codelet_desc fftwi_no_twiddle_1_desc = { + "fftwi_no_twiddle_1", + (void (*)()) fftwi_no_twiddle_1, + 1, + FFTW_BACKWARD, + FFTW_NOTW, + 34, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_10.c b/src/fftw/fni_10.c new file mode 100644 index 0000000..b197726 --- /dev/null +++ b/src/fftw/fni_10.c @@ -0,0 +1,285 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:28 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 10 */ + +/* + * This function contains 84 FP additions, 24 FP multiplications, + * (or, 72 additions, 12 multiplications, 12 fused multiply/add), + * 36 stack variables, and 40 memory accesses + */ +static const fftw_real K559016994 = +FFTW_KONST(+0.559016994374947424102293417182819058860154590); +static const fftw_real K250000000 = +FFTW_KONST(+0.250000000000000000000000000000000000000000000); +static const fftw_real K951056516 = +FFTW_KONST(+0.951056516295153572116439333379382143405698634); +static const fftw_real K587785252 = +FFTW_KONST(+0.587785252292473129168705954639072768597652438); + +/* + * Generator Id's : + * $Id: fni_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_10(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp3; + fftw_real tmp19; + fftw_real tmp67; + fftw_real tmp76; + fftw_real tmp59; + fftw_real tmp60; + fftw_real tmp10; + fftw_real tmp17; + fftw_real tmp18; + fftw_real tmp74; + fftw_real tmp73; + fftw_real tmp22; + fftw_real tmp25; + fftw_real tmp26; + fftw_real tmp36; + fftw_real tmp43; + fftw_real tmp62; + fftw_real tmp63; + fftw_real tmp68; + fftw_real tmp52; + fftw_real tmp55; + fftw_real tmp77; + fftw_real tmp78; + fftw_real tmp79; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp65; + fftw_real tmp66; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[5 * istride]); + tmp3 = tmp1 - tmp2; + tmp19 = tmp1 + tmp2; + tmp65 = c_im(input[0]); + tmp66 = c_im(input[5 * istride]); + tmp67 = tmp65 - tmp66; + tmp76 = tmp65 + tmp66; + } + { + fftw_real tmp6; + fftw_real tmp20; + fftw_real tmp16; + fftw_real tmp24; + fftw_real tmp9; + fftw_real tmp21; + fftw_real tmp13; + fftw_real tmp23; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp14; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[2 * istride]); + tmp5 = c_re(input[7 * istride]); + tmp6 = tmp4 - tmp5; + tmp20 = tmp4 + tmp5; + tmp14 = c_re(input[6 * istride]); + tmp15 = c_re(input[istride]); + tmp16 = tmp14 - tmp15; + tmp24 = tmp14 + tmp15; + } + { + fftw_real tmp7; + fftw_real tmp8; + fftw_real tmp11; + fftw_real tmp12; + ASSERT_ALIGNED_DOUBLE; + tmp7 = c_re(input[8 * istride]); + tmp8 = c_re(input[3 * istride]); + tmp9 = tmp7 - tmp8; + tmp21 = tmp7 + tmp8; + tmp11 = c_re(input[4 * istride]); + tmp12 = c_re(input[9 * istride]); + tmp13 = tmp11 - tmp12; + tmp23 = tmp11 + tmp12; + } + tmp59 = tmp6 - tmp9; + tmp60 = tmp13 - tmp16; + tmp10 = tmp6 + tmp9; + tmp17 = tmp13 + tmp16; + tmp18 = tmp10 + tmp17; + tmp74 = tmp23 - tmp24; + tmp73 = tmp20 - tmp21; + tmp22 = tmp20 + tmp21; + tmp25 = tmp23 + tmp24; + tmp26 = tmp22 + tmp25; + } + { + fftw_real tmp32; + fftw_real tmp50; + fftw_real tmp42; + fftw_real tmp54; + fftw_real tmp35; + fftw_real tmp51; + fftw_real tmp39; + fftw_real tmp53; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp30; + fftw_real tmp31; + fftw_real tmp40; + fftw_real tmp41; + ASSERT_ALIGNED_DOUBLE; + tmp30 = c_im(input[2 * istride]); + tmp31 = c_im(input[7 * istride]); + tmp32 = tmp30 - tmp31; + tmp50 = tmp30 + tmp31; + tmp40 = c_im(input[6 * istride]); + tmp41 = c_im(input[istride]); + tmp42 = tmp40 - tmp41; + tmp54 = tmp40 + tmp41; + } + { + fftw_real tmp33; + fftw_real tmp34; + fftw_real tmp37; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp33 = c_im(input[8 * istride]); + tmp34 = c_im(input[3 * istride]); + tmp35 = tmp33 - tmp34; + tmp51 = tmp33 + tmp34; + tmp37 = c_im(input[4 * istride]); + tmp38 = c_im(input[9 * istride]); + tmp39 = tmp37 - tmp38; + tmp53 = tmp37 + tmp38; + } + tmp36 = tmp32 - tmp35; + tmp43 = tmp39 - tmp42; + tmp62 = tmp32 + tmp35; + tmp63 = tmp39 + tmp42; + tmp68 = tmp62 + tmp63; + tmp52 = tmp50 - tmp51; + tmp55 = tmp53 - tmp54; + tmp77 = tmp50 + tmp51; + tmp78 = tmp53 + tmp54; + tmp79 = tmp77 + tmp78; + } + c_re(output[5 * ostride]) = tmp3 + tmp18; + { + fftw_real tmp44; + fftw_real tmp46; + fftw_real tmp29; + fftw_real tmp45; + fftw_real tmp27; + fftw_real tmp28; + ASSERT_ALIGNED_DOUBLE; + tmp44 = (K587785252 * tmp36) - (K951056516 * tmp43); + tmp46 = (K951056516 * tmp36) + (K587785252 * tmp43); + tmp27 = tmp3 - (K250000000 * tmp18); + tmp28 = K559016994 * (tmp10 - tmp17); + tmp29 = tmp27 - tmp28; + tmp45 = tmp28 + tmp27; + c_re(output[7 * ostride]) = tmp29 - tmp44; + c_re(output[3 * ostride]) = tmp29 + tmp44; + c_re(output[ostride]) = tmp45 - tmp46; + c_re(output[9 * ostride]) = tmp45 + tmp46; + } + c_re(output[0]) = tmp19 + tmp26; + { + fftw_real tmp56; + fftw_real tmp58; + fftw_real tmp49; + fftw_real tmp57; + fftw_real tmp47; + fftw_real tmp48; + ASSERT_ALIGNED_DOUBLE; + tmp56 = (K587785252 * tmp52) - (K951056516 * tmp55); + tmp58 = (K951056516 * tmp52) + (K587785252 * tmp55); + tmp47 = tmp19 - (K250000000 * tmp26); + tmp48 = K559016994 * (tmp22 - tmp25); + tmp49 = tmp47 - tmp48; + tmp57 = tmp48 + tmp47; + c_re(output[2 * ostride]) = tmp49 - tmp56; + c_re(output[8 * ostride]) = tmp49 + tmp56; + c_re(output[6 * ostride]) = tmp57 - tmp58; + c_re(output[4 * ostride]) = tmp57 + tmp58; + } + c_im(output[5 * ostride]) = tmp68 + tmp67; + { + fftw_real tmp61; + fftw_real tmp72; + fftw_real tmp70; + fftw_real tmp71; + fftw_real tmp64; + fftw_real tmp69; + ASSERT_ALIGNED_DOUBLE; + tmp61 = (K951056516 * tmp59) + (K587785252 * tmp60); + tmp72 = (K587785252 * tmp59) - (K951056516 * tmp60); + tmp64 = K559016994 * (tmp62 - tmp63); + tmp69 = tmp67 - (K250000000 * tmp68); + tmp70 = tmp64 + tmp69; + tmp71 = tmp69 - tmp64; + c_im(output[ostride]) = tmp61 + tmp70; + c_im(output[9 * ostride]) = tmp70 - tmp61; + c_im(output[3 * ostride]) = tmp71 - tmp72; + c_im(output[7 * ostride]) = tmp72 + tmp71; + } + c_im(output[0]) = tmp79 + tmp76; + { + fftw_real tmp75; + fftw_real tmp84; + fftw_real tmp82; + fftw_real tmp83; + fftw_real tmp80; + fftw_real tmp81; + ASSERT_ALIGNED_DOUBLE; + tmp75 = (K587785252 * tmp73) - (K951056516 * tmp74); + tmp84 = (K951056516 * tmp73) + (K587785252 * tmp74); + tmp80 = tmp76 - (K250000000 * tmp79); + tmp81 = K559016994 * (tmp77 - tmp78); + tmp82 = tmp80 - tmp81; + tmp83 = tmp81 + tmp80; + c_im(output[2 * ostride]) = tmp75 + tmp82; + c_im(output[8 * ostride]) = tmp82 - tmp75; + c_im(output[4 * ostride]) = tmp83 - tmp84; + c_im(output[6 * ostride]) = tmp84 + tmp83; + } +} + +fftw_codelet_desc fftwi_no_twiddle_10_desc = { + "fftwi_no_twiddle_10", + (void (*)()) fftwi_no_twiddle_10, + 10, + FFTW_BACKWARD, + FFTW_NOTW, + 232, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_11.c b/src/fftw/fni_11.c new file mode 100644 index 0000000..89dcbbc --- /dev/null +++ b/src/fftw/fni_11.c @@ -0,0 +1,312 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:29 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 11 */ + +/* + * This function contains 140 FP additions, 100 FP multiplications, + * (or, 140 additions, 100 multiplications, 0 fused multiply/add), + * 30 stack variables, and 44 memory accesses + */ +static const fftw_real K959492973 = +FFTW_KONST(+0.959492973614497389890368057066327699062454848); +static const fftw_real K654860733 = +FFTW_KONST(+0.654860733945285064056925072466293553183791199); +static const fftw_real K142314838 = +FFTW_KONST(+0.142314838273285140443792668616369668791051361); +static const fftw_real K415415013 = +FFTW_KONST(+0.415415013001886425529274149229623203524004910); +static const fftw_real K841253532 = +FFTW_KONST(+0.841253532831181168861811648919367717513292498); +static const fftw_real K540640817 = +FFTW_KONST(+0.540640817455597582107635954318691695431770608); +static const fftw_real K909631995 = +FFTW_KONST(+0.909631995354518371411715383079028460060241051); +static const fftw_real K281732556 = +FFTW_KONST(+0.281732556841429697711417915346616899035777899); +static const fftw_real K755749574 = +FFTW_KONST(+0.755749574354258283774035843972344420179717445); +static const fftw_real K989821441 = +FFTW_KONST(+0.989821441880932732376092037776718787376519372); + +/* + * Generator Id's : + * $Id: fni_11.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_11.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_11.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_11(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp23; + fftw_real tmp4; + fftw_real tmp17; + fftw_real tmp32; + fftw_real tmp51; + fftw_real tmp38; + fftw_real tmp53; + fftw_real tmp7; + fftw_real tmp21; + fftw_real tmp10; + fftw_real tmp18; + fftw_real tmp29; + fftw_real tmp50; + fftw_real tmp13; + fftw_real tmp19; + fftw_real tmp35; + fftw_real tmp49; + fftw_real tmp26; + fftw_real tmp52; + fftw_real tmp16; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp30; + fftw_real tmp31; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp23 = c_im(input[0]); + tmp2 = c_re(input[istride]); + tmp3 = c_re(input[10 * istride]); + tmp4 = tmp2 + tmp3; + tmp17 = tmp2 - tmp3; + tmp30 = c_im(input[istride]); + tmp31 = c_im(input[10 * istride]); + tmp32 = tmp30 + tmp31; + tmp51 = tmp31 - tmp30; + { + fftw_real tmp36; + fftw_real tmp37; + fftw_real tmp5; + fftw_real tmp6; + ASSERT_ALIGNED_DOUBLE; + tmp36 = c_im(input[2 * istride]); + tmp37 = c_im(input[9 * istride]); + tmp38 = tmp36 + tmp37; + tmp53 = tmp37 - tmp36; + tmp5 = c_re(input[2 * istride]); + tmp6 = c_re(input[9 * istride]); + tmp7 = tmp5 + tmp6; + tmp21 = tmp5 - tmp6; + } + } + { + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp33; + fftw_real tmp34; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(input[3 * istride]); + tmp9 = c_re(input[8 * istride]); + tmp10 = tmp8 + tmp9; + tmp18 = tmp8 - tmp9; + { + fftw_real tmp27; + fftw_real tmp28; + fftw_real tmp11; + fftw_real tmp12; + ASSERT_ALIGNED_DOUBLE; + tmp27 = c_im(input[3 * istride]); + tmp28 = c_im(input[8 * istride]); + tmp29 = tmp27 + tmp28; + tmp50 = tmp28 - tmp27; + tmp11 = c_re(input[4 * istride]); + tmp12 = c_re(input[7 * istride]); + tmp13 = tmp11 + tmp12; + tmp19 = tmp11 - tmp12; + } + tmp33 = c_im(input[4 * istride]); + tmp34 = c_im(input[7 * istride]); + tmp35 = tmp33 + tmp34; + tmp49 = tmp34 - tmp33; + { + fftw_real tmp24; + fftw_real tmp25; + fftw_real tmp14; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp24 = c_im(input[5 * istride]); + tmp25 = c_im(input[6 * istride]); + tmp26 = tmp24 + tmp25; + tmp52 = tmp25 - tmp24; + tmp14 = c_re(input[5 * istride]); + tmp15 = c_re(input[6 * istride]); + tmp16 = tmp14 + tmp15; + tmp20 = tmp14 - tmp15; + } + } + { + fftw_real tmp56; + fftw_real tmp55; + fftw_real tmp44; + fftw_real tmp45; + ASSERT_ALIGNED_DOUBLE; + c_re(output[0]) = tmp1 + tmp4 + tmp7 + tmp10 + tmp13 + tmp16; + { + fftw_real tmp62; + fftw_real tmp61; + fftw_real tmp58; + fftw_real tmp57; + ASSERT_ALIGNED_DOUBLE; + tmp62 = + (K989821441 * tmp52) + (K755749574 * tmp50) + + (K281732556 * tmp51) - (K909631995 * tmp49) - + (K540640817 * tmp53); + tmp61 = + tmp1 + (K841253532 * tmp7) + (K415415013 * tmp13) - + (K142314838 * tmp16) - (K654860733 * tmp10) - + (K959492973 * tmp4); + c_re(output[6 * ostride]) = tmp61 - tmp62; + c_re(output[5 * ostride]) = tmp61 + tmp62; + tmp58 = + (K909631995 * tmp53) + (K755749574 * tmp49) + + (K281732556 * tmp52) + (K989821441 * tmp50) + + (K540640817 * tmp51); + tmp57 = + tmp1 + (K841253532 * tmp4) + (K415415013 * tmp7) - + (K959492973 * tmp16) - (K654860733 * tmp13) - + (K142314838 * tmp10); + c_re(output[10 * ostride]) = tmp57 - tmp58; + c_re(output[ostride]) = tmp57 + tmp58; + } + tmp56 = + (K755749574 * tmp53) + (K909631995 * tmp51) - + (K281732556 * tmp50) - (K540640817 * tmp52) - + (K989821441 * tmp49); + tmp55 = + tmp1 + (K415415013 * tmp4) + (K841253532 * tmp16) - + (K142314838 * tmp13) - (K959492973 * tmp10) - + (K654860733 * tmp7); + c_re(output[9 * ostride]) = tmp55 - tmp56; + c_re(output[2 * ostride]) = tmp55 + tmp56; + { + fftw_real tmp60; + fftw_real tmp59; + fftw_real tmp54; + fftw_real tmp48; + ASSERT_ALIGNED_DOUBLE; + tmp60 = + (K540640817 * tmp49) + (K755749574 * tmp52) + + (K989821441 * tmp51) - (K909631995 * tmp50) - + (K281732556 * tmp53); + tmp59 = + tmp1 + (K415415013 * tmp10) + (K841253532 * tmp13) - + (K654860733 * tmp16) - (K959492973 * tmp7) - + (K142314838 * tmp4); + c_re(output[8 * ostride]) = tmp59 - tmp60; + c_re(output[3 * ostride]) = tmp59 + tmp60; + tmp54 = + (K281732556 * tmp49) + (K540640817 * tmp50) + + (K755749574 * tmp51) - (K909631995 * tmp52) - + (K989821441 * tmp53); + tmp48 = + tmp1 + (K841253532 * tmp10) + (K415415013 * tmp16) - + (K959492973 * tmp13) - (K142314838 * tmp7) - + (K654860733 * tmp4); + c_re(output[7 * ostride]) = tmp48 - tmp54; + c_re(output[4 * ostride]) = tmp48 + tmp54; + } + c_im(output[0]) = tmp23 + tmp38 + tmp35 + tmp26 + tmp29 + tmp32; + { + fftw_real tmp22; + fftw_real tmp39; + fftw_real tmp42; + fftw_real tmp43; + ASSERT_ALIGNED_DOUBLE; + tmp22 = + (K755749574 * tmp17) + (K540640817 * tmp18) + + (K281732556 * tmp19) - (K909631995 * tmp20) - + (K989821441 * tmp21); + tmp39 = + tmp23 + (K415415013 * tmp26) + (K841253532 * tmp29) - + (K654860733 * tmp32) - (K959492973 * tmp35) - + (K142314838 * tmp38); + c_im(output[4 * ostride]) = tmp22 + tmp39; + c_im(output[7 * ostride]) = tmp39 - tmp22; + tmp42 = + (K281732556 * tmp17) + (K755749574 * tmp18) + + (K989821441 * tmp20) - (K909631995 * tmp19) - + (K540640817 * tmp21); + tmp43 = + tmp23 + (K841253532 * tmp38) + (K415415013 * tmp35) - + (K959492973 * tmp32) - (K654860733 * tmp29) - + (K142314838 * tmp26); + c_im(output[5 * ostride]) = tmp42 + tmp43; + c_im(output[6 * ostride]) = tmp43 - tmp42; + } + tmp44 = + (K540640817 * tmp17) + (K909631995 * tmp21) + + (K989821441 * tmp18) + (K755749574 * tmp19) + + (K281732556 * tmp20); + tmp45 = + tmp23 + (K415415013 * tmp38) + (K841253532 * tmp32) - + (K142314838 * tmp29) - (K959492973 * tmp26) - + (K654860733 * tmp35); + c_im(output[ostride]) = tmp44 + tmp45; + c_im(output[10 * ostride]) = tmp45 - tmp44; + { + fftw_real tmp40; + fftw_real tmp41; + fftw_real tmp46; + fftw_real tmp47; + ASSERT_ALIGNED_DOUBLE; + tmp40 = + (K989821441 * tmp17) + (K540640817 * tmp19) + + (K755749574 * tmp20) - (K909631995 * tmp18) - + (K281732556 * tmp21); + tmp41 = + tmp23 + (K841253532 * tmp35) + (K415415013 * tmp29) - + (K142314838 * tmp32) - (K654860733 * tmp26) - + (K959492973 * tmp38); + c_im(output[3 * ostride]) = tmp40 + tmp41; + c_im(output[8 * ostride]) = tmp41 - tmp40; + tmp46 = + (K909631995 * tmp17) + (K755749574 * tmp21) - + (K540640817 * tmp20) - (K989821441 * tmp19) - + (K281732556 * tmp18); + tmp47 = + tmp23 + (K841253532 * tmp26) + (K415415013 * tmp32) - + (K959492973 * tmp29) - (K142314838 * tmp35) - + (K654860733 * tmp38); + c_im(output[2 * ostride]) = tmp46 + tmp47; + c_im(output[9 * ostride]) = tmp47 - tmp46; + } + } +} + +fftw_codelet_desc fftwi_no_twiddle_11_desc = { + "fftwi_no_twiddle_11", + (void (*)()) fftwi_no_twiddle_11, + 11, + FFTW_BACKWARD, + FFTW_NOTW, + 254, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_12.c b/src/fftw/fni_12.c new file mode 100644 index 0000000..889391b --- /dev/null +++ b/src/fftw/fni_12.c @@ -0,0 +1,324 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:36 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 12 */ + +/* + * This function contains 96 FP additions, 16 FP multiplications, + * (or, 88 additions, 8 multiplications, 8 fused multiply/add), + * 40 stack variables, and 48 memory accesses + */ +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); + +/* + * Generator Id's : + * $Id: fni_12.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_12.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_12.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_12(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp5; + fftw_real tmp35; + fftw_real tmp57; + fftw_real tmp27; + fftw_real tmp58; + fftw_real tmp36; + fftw_real tmp10; + fftw_real tmp38; + fftw_real tmp60; + fftw_real tmp32; + fftw_real tmp61; + fftw_real tmp39; + fftw_real tmp16; + fftw_real tmp82; + fftw_real tmp42; + fftw_real tmp47; + fftw_real tmp76; + fftw_real tmp83; + fftw_real tmp21; + fftw_real tmp85; + fftw_real tmp49; + fftw_real tmp54; + fftw_real tmp77; + fftw_real tmp86; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[4 * istride]); + tmp3 = c_re(input[8 * istride]); + tmp4 = tmp2 + tmp3; + tmp5 = tmp1 + tmp4; + tmp35 = tmp1 - (K500000000 * tmp4); + tmp57 = K866025403 * (tmp2 - tmp3); + } + { + fftw_real tmp23; + fftw_real tmp24; + fftw_real tmp25; + fftw_real tmp26; + ASSERT_ALIGNED_DOUBLE; + tmp23 = c_im(input[0]); + tmp24 = c_im(input[4 * istride]); + tmp25 = c_im(input[8 * istride]); + tmp26 = tmp24 + tmp25; + tmp27 = tmp23 + tmp26; + tmp58 = tmp23 - (K500000000 * tmp26); + tmp36 = K866025403 * (tmp25 - tmp24); + } + { + fftw_real tmp6; + fftw_real tmp7; + fftw_real tmp8; + fftw_real tmp9; + ASSERT_ALIGNED_DOUBLE; + tmp6 = c_re(input[6 * istride]); + tmp7 = c_re(input[10 * istride]); + tmp8 = c_re(input[2 * istride]); + tmp9 = tmp7 + tmp8; + tmp10 = tmp6 + tmp9; + tmp38 = tmp6 - (K500000000 * tmp9); + tmp60 = K866025403 * (tmp7 - tmp8); + } + { + fftw_real tmp28; + fftw_real tmp29; + fftw_real tmp30; + fftw_real tmp31; + ASSERT_ALIGNED_DOUBLE; + tmp28 = c_im(input[6 * istride]); + tmp29 = c_im(input[10 * istride]); + tmp30 = c_im(input[2 * istride]); + tmp31 = tmp29 + tmp30; + tmp32 = tmp28 + tmp31; + tmp61 = tmp28 - (K500000000 * tmp31); + tmp39 = K866025403 * (tmp30 - tmp29); + } + { + fftw_real tmp12; + fftw_real tmp13; + fftw_real tmp14; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp12 = c_re(input[3 * istride]); + tmp13 = c_re(input[7 * istride]); + tmp14 = c_re(input[11 * istride]); + tmp15 = tmp13 + tmp14; + tmp16 = tmp12 + tmp15; + tmp82 = tmp12 - (K500000000 * tmp15); + tmp42 = K866025403 * (tmp13 - tmp14); + } + { + fftw_real tmp43; + fftw_real tmp44; + fftw_real tmp45; + fftw_real tmp46; + ASSERT_ALIGNED_DOUBLE; + tmp43 = c_im(input[3 * istride]); + tmp44 = c_im(input[7 * istride]); + tmp45 = c_im(input[11 * istride]); + tmp46 = tmp44 + tmp45; + tmp47 = tmp43 - (K500000000 * tmp46); + tmp76 = tmp43 + tmp46; + tmp83 = K866025403 * (tmp45 - tmp44); + } + { + fftw_real tmp17; + fftw_real tmp18; + fftw_real tmp19; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + tmp17 = c_re(input[9 * istride]); + tmp18 = c_re(input[istride]); + tmp19 = c_re(input[5 * istride]); + tmp20 = tmp18 + tmp19; + tmp21 = tmp17 + tmp20; + tmp85 = tmp17 - (K500000000 * tmp20); + tmp49 = K866025403 * (tmp18 - tmp19); + } + { + fftw_real tmp50; + fftw_real tmp51; + fftw_real tmp52; + fftw_real tmp53; + ASSERT_ALIGNED_DOUBLE; + tmp50 = c_im(input[9 * istride]); + tmp51 = c_im(input[istride]); + tmp52 = c_im(input[5 * istride]); + tmp53 = tmp51 + tmp52; + tmp54 = tmp50 - (K500000000 * tmp53); + tmp77 = tmp50 + tmp53; + tmp86 = K866025403 * (tmp52 - tmp51); + } + { + fftw_real tmp11; + fftw_real tmp22; + fftw_real tmp33; + fftw_real tmp34; + ASSERT_ALIGNED_DOUBLE; + tmp11 = tmp5 + tmp10; + tmp22 = tmp16 + tmp21; + c_re(output[6 * ostride]) = tmp11 - tmp22; + c_re(output[0]) = tmp11 + tmp22; + { + fftw_real tmp75; + fftw_real tmp78; + fftw_real tmp79; + fftw_real tmp80; + ASSERT_ALIGNED_DOUBLE; + tmp75 = tmp5 - tmp10; + tmp78 = tmp76 - tmp77; + c_re(output[9 * ostride]) = tmp75 - tmp78; + c_re(output[3 * ostride]) = tmp75 + tmp78; + tmp79 = tmp27 + tmp32; + tmp80 = tmp76 + tmp77; + c_im(output[6 * ostride]) = tmp79 - tmp80; + c_im(output[0]) = tmp79 + tmp80; + } + tmp33 = tmp27 - tmp32; + tmp34 = tmp16 - tmp21; + c_im(output[3 * ostride]) = tmp33 - tmp34; + c_im(output[9 * ostride]) = tmp34 + tmp33; + { + fftw_real tmp67; + fftw_real tmp89; + fftw_real tmp88; + fftw_real tmp90; + fftw_real tmp70; + fftw_real tmp74; + fftw_real tmp73; + fftw_real tmp81; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp65; + fftw_real tmp66; + fftw_real tmp84; + fftw_real tmp87; + ASSERT_ALIGNED_DOUBLE; + tmp65 = tmp35 - tmp36; + tmp66 = tmp38 - tmp39; + tmp67 = tmp65 - tmp66; + tmp89 = tmp65 + tmp66; + tmp84 = tmp82 - tmp83; + tmp87 = tmp85 - tmp86; + tmp88 = tmp84 - tmp87; + tmp90 = tmp84 + tmp87; + } + { + fftw_real tmp68; + fftw_real tmp69; + fftw_real tmp71; + fftw_real tmp72; + ASSERT_ALIGNED_DOUBLE; + tmp68 = tmp47 - tmp42; + tmp69 = tmp54 - tmp49; + tmp70 = tmp68 - tmp69; + tmp74 = tmp68 + tmp69; + tmp71 = tmp58 - tmp57; + tmp72 = tmp61 - tmp60; + tmp73 = tmp71 + tmp72; + tmp81 = tmp71 - tmp72; + } + c_re(output[5 * ostride]) = tmp67 - tmp70; + c_re(output[11 * ostride]) = tmp67 + tmp70; + c_im(output[2 * ostride]) = tmp73 - tmp74; + c_im(output[8 * ostride]) = tmp73 + tmp74; + c_im(output[11 * ostride]) = tmp81 - tmp88; + c_im(output[5 * ostride]) = tmp81 + tmp88; + c_re(output[2 * ostride]) = tmp89 - tmp90; + c_re(output[8 * ostride]) = tmp89 + tmp90; + } + { + fftw_real tmp41; + fftw_real tmp95; + fftw_real tmp94; + fftw_real tmp96; + fftw_real tmp56; + fftw_real tmp64; + fftw_real tmp63; + fftw_real tmp91; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp37; + fftw_real tmp40; + fftw_real tmp92; + fftw_real tmp93; + ASSERT_ALIGNED_DOUBLE; + tmp37 = tmp35 + tmp36; + tmp40 = tmp38 + tmp39; + tmp41 = tmp37 - tmp40; + tmp95 = tmp37 + tmp40; + tmp92 = tmp82 + tmp83; + tmp93 = tmp85 + tmp86; + tmp94 = tmp92 - tmp93; + tmp96 = tmp92 + tmp93; + } + { + fftw_real tmp48; + fftw_real tmp55; + fftw_real tmp59; + fftw_real tmp62; + ASSERT_ALIGNED_DOUBLE; + tmp48 = tmp42 + tmp47; + tmp55 = tmp49 + tmp54; + tmp56 = tmp48 - tmp55; + tmp64 = tmp48 + tmp55; + tmp59 = tmp57 + tmp58; + tmp62 = tmp60 + tmp61; + tmp63 = tmp59 + tmp62; + tmp91 = tmp59 - tmp62; + } + c_re(output[ostride]) = tmp41 - tmp56; + c_re(output[7 * ostride]) = tmp41 + tmp56; + c_im(output[10 * ostride]) = tmp63 - tmp64; + c_im(output[4 * ostride]) = tmp63 + tmp64; + c_im(output[7 * ostride]) = tmp91 - tmp94; + c_im(output[ostride]) = tmp91 + tmp94; + c_re(output[10 * ostride]) = tmp95 - tmp96; + c_re(output[4 * ostride]) = tmp95 + tmp96; + } + } +} + +fftw_codelet_desc fftwi_no_twiddle_12_desc = { + "fftwi_no_twiddle_12", + (void (*)()) fftwi_no_twiddle_12, + 12, + FFTW_BACKWARD, + FFTW_NOTW, + 276, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_13.c b/src/fftw/fni_13.c new file mode 100644 index 0000000..424a6c3 --- /dev/null +++ b/src/fftw/fni_13.c @@ -0,0 +1,546 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:37 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 13 */ + +/* + * This function contains 176 FP additions, 68 FP multiplications, + * (or, 138 additions, 30 multiplications, 38 fused multiply/add), + * 50 stack variables, and 52 memory accesses + */ +static const fftw_real K1_732050807 = +FFTW_KONST(+1.732050807568877293527446341505872366942805254); +static const fftw_real K156891391 = +FFTW_KONST(+0.156891391051584611046832726756003269660212636); +static const fftw_real K256247671 = +FFTW_KONST(+0.256247671582936600958684654061725059144125175); +static const fftw_real K300238635 = +FFTW_KONST(+0.300238635966332641462884626667381504676006424); +static const fftw_real K011599105 = +FFTW_KONST(+0.011599105605768290721655456654083252189827041); +static const fftw_real K174138601 = +FFTW_KONST(+0.174138601152135905005660794929264742616964676); +static const fftw_real K575140729 = +FFTW_KONST(+0.575140729474003121368385547455453388461001608); +static const fftw_real K2_000000000 = +FFTW_KONST(+2.000000000000000000000000000000000000000000000); +static const fftw_real K083333333 = +FFTW_KONST(+0.083333333333333333333333333333333333333333333); +static const fftw_real K075902986 = +FFTW_KONST(+0.075902986037193865983102897245103540356428373); +static const fftw_real K251768516 = +FFTW_KONST(+0.251768516431883313623436926934233488546674281); +static const fftw_real K258260390 = +FFTW_KONST(+0.258260390311744861420450644284508567852516811); +static const fftw_real K132983124 = +FFTW_KONST(+0.132983124607418643793760531921092974399165133); +static const fftw_real K503537032 = +FFTW_KONST(+0.503537032863766627246873853868466977093348562); +static const fftw_real K113854479 = +FFTW_KONST(+0.113854479055790798974654345867655310534642560); +static const fftw_real K265966249 = +FFTW_KONST(+0.265966249214837287587521063842185948798330267); +static const fftw_real K387390585 = +FFTW_KONST(+0.387390585467617292130675966426762851778775217); +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); +static const fftw_real K300462606 = +FFTW_KONST(+0.300462606288665774426601772289207995520941381); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); + +/* + * Generator Id's : + * $Id: fni_13.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_13.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_13.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_13(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp146; + fftw_real tmp113; + fftw_real tmp24; + fftw_real tmp38; + fftw_real tmp36; + fftw_real tmp41; + fftw_real tmp116; + fftw_real tmp120; + fftw_real tmp125; + fftw_real tmp31; + fftw_real tmp40; + fftw_real tmp123; + fftw_real tmp126; + fftw_real tmp56; + fftw_real tmp80; + fftw_real tmp82; + fftw_real tmp137; + fftw_real tmp144; + fftw_real tmp67; + fftw_real tmp141; + fftw_real tmp147; + fftw_real tmp134; + fftw_real tmp143; + fftw_real tmp75; + fftw_real tmp83; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp146 = c_im(input[0]); + { + fftw_real tmp15; + fftw_real tmp114; + fftw_real tmp18; + fftw_real tmp26; + fftw_real tmp21; + fftw_real tmp25; + fftw_real tmp22; + fftw_real tmp115; + fftw_real tmp6; + fftw_real tmp32; + fftw_real tmp28; + fftw_real tmp11; + fftw_real tmp33; + fftw_real tmp29; + fftw_real tmp13; + fftw_real tmp14; + fftw_real tmp118; + fftw_real tmp119; + ASSERT_ALIGNED_DOUBLE; + tmp13 = c_re(input[8 * istride]); + tmp14 = c_re(input[5 * istride]); + tmp15 = tmp13 + tmp14; + tmp114 = tmp13 - tmp14; + { + fftw_real tmp16; + fftw_real tmp17; + fftw_real tmp19; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + tmp16 = c_re(input[6 * istride]); + tmp17 = c_re(input[11 * istride]); + tmp18 = tmp16 + tmp17; + tmp26 = tmp16 - tmp17; + tmp19 = c_re(input[2 * istride]); + tmp20 = c_re(input[7 * istride]); + tmp21 = tmp19 + tmp20; + tmp25 = tmp19 - tmp20; + } + tmp22 = tmp18 + tmp21; + tmp115 = tmp26 + tmp25; + { + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp4; + fftw_real tmp5; + ASSERT_ALIGNED_DOUBLE; + tmp2 = c_re(input[istride]); + tmp3 = c_re(input[3 * istride]); + tmp4 = c_re(input[9 * istride]); + tmp5 = tmp3 + tmp4; + tmp6 = tmp2 + tmp5; + tmp32 = tmp2 - (K500000000 * tmp5); + tmp28 = tmp3 - tmp4; + } + { + fftw_real tmp7; + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp7 = c_re(input[12 * istride]); + tmp8 = c_re(input[4 * istride]); + tmp9 = c_re(input[10 * istride]); + tmp10 = tmp8 + tmp9; + tmp11 = tmp7 + tmp10; + tmp33 = tmp7 - (K500000000 * tmp10); + tmp29 = tmp8 - tmp9; + } + tmp113 = tmp6 - tmp11; + { + fftw_real tmp12; + fftw_real tmp23; + fftw_real tmp34; + fftw_real tmp35; + ASSERT_ALIGNED_DOUBLE; + tmp12 = tmp6 + tmp11; + tmp23 = tmp15 + tmp22; + tmp24 = tmp12 + tmp23; + tmp38 = K300462606 * (tmp12 - tmp23); + tmp34 = tmp32 + tmp33; + tmp35 = tmp15 - (K500000000 * tmp22); + tmp36 = tmp34 - tmp35; + tmp41 = tmp34 + tmp35; + } + tmp116 = tmp114 - tmp115; + tmp118 = K866025403 * (tmp28 + tmp29); + tmp119 = tmp114 + (K500000000 * tmp115); + tmp120 = tmp118 + tmp119; + tmp125 = tmp119 - tmp118; + { + fftw_real tmp27; + fftw_real tmp30; + fftw_real tmp121; + fftw_real tmp122; + ASSERT_ALIGNED_DOUBLE; + tmp27 = tmp25 - tmp26; + tmp30 = tmp28 - tmp29; + tmp31 = tmp27 - tmp30; + tmp40 = tmp30 + tmp27; + tmp121 = tmp32 - tmp33; + tmp122 = K866025403 * (tmp18 - tmp21); + tmp123 = tmp121 - tmp122; + tmp126 = tmp121 + tmp122; + } + } + { + fftw_real tmp48; + fftw_real tmp131; + fftw_real tmp66; + fftw_real tmp70; + fftw_real tmp77; + fftw_real tmp61; + fftw_real tmp69; + fftw_real tmp76; + fftw_real tmp51; + fftw_real tmp73; + fftw_real tmp54; + fftw_real tmp72; + fftw_real tmp55; + fftw_real tmp132; + fftw_real tmp46; + fftw_real tmp47; + fftw_real tmp139; + fftw_real tmp140; + ASSERT_ALIGNED_DOUBLE; + tmp46 = c_im(input[8 * istride]); + tmp47 = c_im(input[5 * istride]); + tmp48 = tmp46 - tmp47; + tmp131 = tmp46 + tmp47; + { + fftw_real tmp62; + fftw_real tmp63; + fftw_real tmp64; + fftw_real tmp65; + ASSERT_ALIGNED_DOUBLE; + tmp62 = c_im(input[12 * istride]); + tmp63 = c_im(input[4 * istride]); + tmp64 = c_im(input[10 * istride]); + tmp65 = tmp63 + tmp64; + tmp66 = tmp62 + tmp65; + tmp70 = tmp62 - (K500000000 * tmp65); + tmp77 = tmp63 - tmp64; + } + { + fftw_real tmp57; + fftw_real tmp58; + fftw_real tmp59; + fftw_real tmp60; + ASSERT_ALIGNED_DOUBLE; + tmp57 = c_im(input[istride]); + tmp58 = c_im(input[3 * istride]); + tmp59 = c_im(input[9 * istride]); + tmp60 = tmp58 + tmp59; + tmp61 = tmp57 + tmp60; + tmp69 = tmp57 - (K500000000 * tmp60); + tmp76 = tmp58 - tmp59; + } + { + fftw_real tmp49; + fftw_real tmp50; + fftw_real tmp52; + fftw_real tmp53; + ASSERT_ALIGNED_DOUBLE; + tmp49 = c_im(input[6 * istride]); + tmp50 = c_im(input[11 * istride]); + tmp51 = tmp49 - tmp50; + tmp73 = tmp49 + tmp50; + tmp52 = c_im(input[2 * istride]); + tmp53 = c_im(input[7 * istride]); + tmp54 = tmp52 - tmp53; + tmp72 = tmp52 + tmp53; + } + tmp55 = tmp51 + tmp54; + tmp132 = tmp73 + tmp72; + tmp56 = tmp48 - tmp55; + { + fftw_real tmp78; + fftw_real tmp79; + fftw_real tmp135; + fftw_real tmp136; + ASSERT_ALIGNED_DOUBLE; + tmp78 = K866025403 * (tmp76 + tmp77); + tmp79 = tmp48 + (K500000000 * tmp55); + tmp80 = tmp78 - tmp79; + tmp82 = tmp78 + tmp79; + tmp135 = tmp51 - tmp54; + tmp136 = tmp77 - tmp76; + tmp137 = tmp135 - tmp136; + tmp144 = tmp136 + tmp135; + } + tmp67 = tmp61 - tmp66; + tmp139 = tmp61 + tmp66; + tmp140 = tmp131 + tmp132; + tmp141 = K300462606 * (tmp139 - tmp140); + tmp147 = tmp139 + tmp140; + { + fftw_real tmp130; + fftw_real tmp133; + fftw_real tmp71; + fftw_real tmp74; + ASSERT_ALIGNED_DOUBLE; + tmp130 = tmp69 + tmp70; + tmp133 = tmp131 - (K500000000 * tmp132); + tmp134 = tmp130 - tmp133; + tmp143 = tmp130 + tmp133; + tmp71 = tmp69 - tmp70; + tmp74 = K866025403 * (tmp72 - tmp73); + tmp75 = tmp71 - tmp74; + tmp83 = tmp71 + tmp74; + } + } + c_re(output[0]) = tmp1 + tmp24; + { + fftw_real tmp100; + fftw_real tmp108; + fftw_real tmp39; + fftw_real tmp105; + fftw_real tmp95; + fftw_real tmp44; + fftw_real tmp68; + fftw_real tmp85; + fftw_real tmp96; + fftw_real tmp106; + fftw_real tmp88; + fftw_real tmp91; + fftw_real tmp101; + fftw_real tmp109; + fftw_real tmp98; + fftw_real tmp99; + ASSERT_ALIGNED_DOUBLE; + tmp98 = (K387390585 * tmp31) - (K265966249 * tmp36); + tmp99 = (K113854479 * tmp40) - (K503537032 * tmp41); + tmp100 = tmp98 + tmp99; + tmp108 = tmp99 - tmp98; + { + fftw_real tmp37; + fftw_real tmp94; + fftw_real tmp42; + fftw_real tmp43; + fftw_real tmp93; + ASSERT_ALIGNED_DOUBLE; + tmp37 = (K132983124 * tmp31) + (K258260390 * tmp36); + tmp94 = tmp38 - tmp37; + tmp42 = (K251768516 * tmp40) + (K075902986 * tmp41); + tmp43 = tmp1 - (K083333333 * tmp24); + tmp93 = tmp43 - tmp42; + tmp39 = (K2_000000000 * tmp37) + tmp38; + tmp105 = tmp94 + tmp93; + tmp95 = tmp93 - tmp94; + tmp44 = (K2_000000000 * tmp42) + tmp43; + } + { + fftw_real tmp81; + fftw_real tmp84; + fftw_real tmp89; + fftw_real tmp90; + ASSERT_ALIGNED_DOUBLE; + tmp68 = (K575140729 * tmp56) - (K174138601 * tmp67); + tmp81 = (K011599105 * tmp75) - (K300238635 * tmp80); + tmp84 = (K256247671 * tmp82) + (K156891391 * tmp83); + tmp85 = tmp81 - tmp84; + tmp96 = K1_732050807 * (tmp81 + tmp84); + tmp106 = tmp68 - tmp85; + tmp88 = (K575140729 * tmp67) + (K174138601 * tmp56); + tmp89 = (K256247671 * tmp83) - (K156891391 * tmp82); + tmp90 = (K011599105 * tmp80) + (K300238635 * tmp75); + tmp91 = tmp89 - tmp90; + tmp101 = tmp88 + tmp91; + tmp109 = K1_732050807 * (tmp90 + tmp89); + } + { + fftw_real tmp45; + fftw_real tmp86; + fftw_real tmp103; + fftw_real tmp104; + ASSERT_ALIGNED_DOUBLE; + tmp45 = tmp39 + tmp44; + tmp86 = tmp68 + (K2_000000000 * tmp85); + c_re(output[12 * ostride]) = tmp45 - tmp86; + c_re(output[ostride]) = tmp45 + tmp86; + { + fftw_real tmp87; + fftw_real tmp92; + fftw_real tmp97; + fftw_real tmp102; + ASSERT_ALIGNED_DOUBLE; + tmp87 = tmp44 - tmp39; + tmp92 = tmp88 - (K2_000000000 * tmp91); + c_re(output[5 * ostride]) = tmp87 - tmp92; + c_re(output[8 * ostride]) = tmp87 + tmp92; + tmp97 = tmp95 - tmp96; + tmp102 = tmp100 + tmp101; + c_re(output[2 * ostride]) = tmp97 - tmp102; + c_re(output[7 * ostride]) = tmp97 + tmp102; + } + tmp103 = tmp95 + tmp96; + tmp104 = tmp101 - tmp100; + c_re(output[6 * ostride]) = tmp103 - tmp104; + c_re(output[11 * ostride]) = tmp103 + tmp104; + { + fftw_real tmp111; + fftw_real tmp112; + fftw_real tmp107; + fftw_real tmp110; + ASSERT_ALIGNED_DOUBLE; + tmp111 = tmp105 - tmp106; + tmp112 = tmp109 - tmp108; + c_re(output[4 * ostride]) = tmp111 - tmp112; + c_re(output[10 * ostride]) = tmp111 + tmp112; + tmp107 = tmp105 + tmp106; + tmp110 = tmp108 + tmp109; + c_re(output[3 * ostride]) = tmp107 - tmp110; + c_re(output[9 * ostride]) = tmp107 + tmp110; + } + } + } + c_im(output[0]) = tmp147 + tmp146; + { + fftw_real tmp160; + fftw_real tmp173; + fftw_real tmp142; + fftw_real tmp170; + fftw_real tmp165; + fftw_real tmp149; + fftw_real tmp117; + fftw_real tmp128; + fftw_real tmp162; + fftw_real tmp169; + fftw_real tmp151; + fftw_real tmp154; + fftw_real tmp157; + fftw_real tmp172; + fftw_real tmp158; + fftw_real tmp159; + ASSERT_ALIGNED_DOUBLE; + tmp158 = (K387390585 * tmp137) + (K265966249 * tmp134); + tmp159 = (K113854479 * tmp144) + (K503537032 * tmp143); + tmp160 = tmp158 + tmp159; + tmp173 = tmp158 - tmp159; + { + fftw_real tmp138; + fftw_real tmp164; + fftw_real tmp145; + fftw_real tmp148; + fftw_real tmp163; + ASSERT_ALIGNED_DOUBLE; + tmp138 = (K258260390 * tmp134) - (K132983124 * tmp137); + tmp164 = tmp141 - tmp138; + tmp145 = (K075902986 * tmp143) - (K251768516 * tmp144); + tmp148 = tmp146 - (K083333333 * tmp147); + tmp163 = tmp148 - tmp145; + tmp142 = (K2_000000000 * tmp138) + tmp141; + tmp170 = tmp164 + tmp163; + tmp165 = tmp163 - tmp164; + tmp149 = (K2_000000000 * tmp145) + tmp148; + } + { + fftw_real tmp124; + fftw_real tmp127; + fftw_real tmp152; + fftw_real tmp153; + ASSERT_ALIGNED_DOUBLE; + tmp117 = (K174138601 * tmp113) - (K575140729 * tmp116); + tmp124 = (K256247671 * tmp120) + (K156891391 * tmp123); + tmp127 = (K300238635 * tmp125) + (K011599105 * tmp126); + tmp128 = tmp124 - tmp127; + tmp162 = K1_732050807 * (tmp124 + tmp127); + tmp169 = tmp117 - tmp128; + tmp151 = (K575140729 * tmp113) + (K174138601 * tmp116); + tmp152 = (K256247671 * tmp123) - (K156891391 * tmp120); + tmp153 = (K011599105 * tmp125) - (K300238635 * tmp126); + tmp154 = tmp152 + tmp153; + tmp157 = tmp151 + tmp154; + tmp172 = K1_732050807 * (tmp153 - tmp152); + } + { + fftw_real tmp129; + fftw_real tmp150; + fftw_real tmp167; + fftw_real tmp168; + ASSERT_ALIGNED_DOUBLE; + tmp129 = tmp117 + (K2_000000000 * tmp128); + tmp150 = tmp142 + tmp149; + c_im(output[ostride]) = tmp129 + tmp150; + c_im(output[12 * ostride]) = tmp150 - tmp129; + { + fftw_real tmp155; + fftw_real tmp156; + fftw_real tmp161; + fftw_real tmp166; + ASSERT_ALIGNED_DOUBLE; + tmp155 = tmp151 - (K2_000000000 * tmp154); + tmp156 = tmp149 - tmp142; + c_im(output[5 * ostride]) = tmp155 + tmp156; + c_im(output[8 * ostride]) = tmp156 - tmp155; + tmp161 = tmp157 + tmp160; + tmp166 = tmp162 + tmp165; + c_im(output[2 * ostride]) = tmp161 + tmp166; + c_im(output[7 * ostride]) = tmp166 - tmp161; + } + tmp167 = tmp165 - tmp162; + tmp168 = tmp160 - tmp157; + c_im(output[6 * ostride]) = tmp167 - tmp168; + c_im(output[11 * ostride]) = tmp168 + tmp167; + { + fftw_real tmp175; + fftw_real tmp176; + fftw_real tmp171; + fftw_real tmp174; + ASSERT_ALIGNED_DOUBLE; + tmp175 = tmp170 - tmp169; + tmp176 = tmp172 - tmp173; + c_im(output[4 * ostride]) = tmp175 - tmp176; + c_im(output[10 * ostride]) = tmp176 + tmp175; + tmp171 = tmp169 + tmp170; + tmp174 = tmp172 + tmp173; + c_im(output[3 * ostride]) = tmp171 - tmp174; + c_im(output[9 * ostride]) = tmp174 + tmp171; + } + } + } +} + +fftw_codelet_desc fftwi_no_twiddle_13_desc = { + "fftwi_no_twiddle_13", + (void (*)()) fftwi_no_twiddle_13, + 13, + FFTW_BACKWARD, + FFTW_NOTW, + 298, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_14.c b/src/fftw/fni_14.c new file mode 100644 index 0000000..61f5f89 --- /dev/null +++ b/src/fftw/fni_14.c @@ -0,0 +1,409 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:45 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 14 */ + +/* + * This function contains 148 FP additions, 72 FP multiplications, + * (or, 148 additions, 72 multiplications, 0 fused multiply/add), + * 36 stack variables, and 56 memory accesses + */ +static const fftw_real K900968867 = +FFTW_KONST(+0.900968867902419126236102319507445051165919162); +static const fftw_real K222520933 = +FFTW_KONST(+0.222520933956314404288902564496794759466355569); +static const fftw_real K623489801 = +FFTW_KONST(+0.623489801858733530525004884004239810632274731); +static const fftw_real K781831482 = +FFTW_KONST(+0.781831482468029808708444526674057750232334519); +static const fftw_real K974927912 = +FFTW_KONST(+0.974927912181823607018131682993931217232785801); +static const fftw_real K433883739 = +FFTW_KONST(+0.433883739117558120475768332848358754609990728); + +/* + * Generator Id's : + * $Id: fni_14.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_14.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_14.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_14(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp3; + fftw_real tmp25; + fftw_real tmp84; + fftw_real tmp93; + fftw_real tmp10; + fftw_real tmp77; + fftw_real tmp28; + fftw_real tmp97; + fftw_real tmp42; + fftw_real tmp86; + fftw_real tmp65; + fftw_real tmp92; + fftw_real tmp17; + fftw_real tmp79; + fftw_real tmp31; + fftw_real tmp99; + fftw_real tmp56; + fftw_real tmp81; + fftw_real tmp68; + fftw_real tmp94; + fftw_real tmp24; + fftw_real tmp78; + fftw_real tmp34; + fftw_real tmp98; + fftw_real tmp49; + fftw_real tmp85; + fftw_real tmp71; + fftw_real tmp95; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp82; + fftw_real tmp83; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[7 * istride]); + tmp3 = tmp1 - tmp2; + tmp25 = tmp1 + tmp2; + tmp82 = c_im(input[0]); + tmp83 = c_im(input[7 * istride]); + tmp84 = tmp82 - tmp83; + tmp93 = tmp82 + tmp83; + } + { + fftw_real tmp6; + fftw_real tmp26; + fftw_real tmp9; + fftw_real tmp27; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp7; + fftw_real tmp8; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[2 * istride]); + tmp5 = c_re(input[9 * istride]); + tmp6 = tmp4 - tmp5; + tmp26 = tmp4 + tmp5; + tmp7 = c_re(input[12 * istride]); + tmp8 = c_re(input[5 * istride]); + tmp9 = tmp7 - tmp8; + tmp27 = tmp7 + tmp8; + } + tmp10 = tmp6 + tmp9; + tmp77 = tmp6 - tmp9; + tmp28 = tmp26 + tmp27; + tmp97 = tmp26 - tmp27; + } + { + fftw_real tmp38; + fftw_real tmp63; + fftw_real tmp41; + fftw_real tmp64; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp36; + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp40; + ASSERT_ALIGNED_DOUBLE; + tmp36 = c_im(input[12 * istride]); + tmp37 = c_im(input[5 * istride]); + tmp38 = tmp36 - tmp37; + tmp63 = tmp36 + tmp37; + tmp39 = c_im(input[2 * istride]); + tmp40 = c_im(input[9 * istride]); + tmp41 = tmp39 - tmp40; + tmp64 = tmp39 + tmp40; + } + tmp42 = tmp38 - tmp41; + tmp86 = tmp38 + tmp41; + tmp65 = tmp63 - tmp64; + tmp92 = tmp63 + tmp64; + } + { + fftw_real tmp13; + fftw_real tmp29; + fftw_real tmp16; + fftw_real tmp30; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp11; + fftw_real tmp12; + fftw_real tmp14; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[4 * istride]); + tmp12 = c_re(input[11 * istride]); + tmp13 = tmp11 - tmp12; + tmp29 = tmp11 + tmp12; + tmp14 = c_re(input[10 * istride]); + tmp15 = c_re(input[3 * istride]); + tmp16 = tmp14 - tmp15; + tmp30 = tmp14 + tmp15; + } + tmp17 = tmp13 + tmp16; + tmp79 = tmp13 - tmp16; + tmp31 = tmp29 + tmp30; + tmp99 = tmp30 - tmp29; + } + { + fftw_real tmp52; + fftw_real tmp67; + fftw_real tmp55; + fftw_real tmp66; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp50; + fftw_real tmp51; + fftw_real tmp53; + fftw_real tmp54; + ASSERT_ALIGNED_DOUBLE; + tmp50 = c_im(input[10 * istride]); + tmp51 = c_im(input[3 * istride]); + tmp52 = tmp50 - tmp51; + tmp67 = tmp50 + tmp51; + tmp53 = c_im(input[4 * istride]); + tmp54 = c_im(input[11 * istride]); + tmp55 = tmp53 - tmp54; + tmp66 = tmp53 + tmp54; + } + tmp56 = tmp52 - tmp55; + tmp81 = tmp52 + tmp55; + tmp68 = tmp66 - tmp67; + tmp94 = tmp67 + tmp66; + } + { + fftw_real tmp20; + fftw_real tmp32; + fftw_real tmp23; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp18; + fftw_real tmp19; + fftw_real tmp21; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp18 = c_re(input[6 * istride]); + tmp19 = c_re(input[13 * istride]); + tmp20 = tmp18 - tmp19; + tmp32 = tmp18 + tmp19; + tmp21 = c_re(input[8 * istride]); + tmp22 = c_re(input[istride]); + tmp23 = tmp21 - tmp22; + tmp33 = tmp21 + tmp22; + } + tmp24 = tmp20 + tmp23; + tmp78 = tmp20 - tmp23; + tmp34 = tmp32 + tmp33; + tmp98 = tmp33 - tmp32; + } + { + fftw_real tmp45; + fftw_real tmp70; + fftw_real tmp48; + fftw_real tmp69; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp43; + fftw_real tmp44; + fftw_real tmp46; + fftw_real tmp47; + ASSERT_ALIGNED_DOUBLE; + tmp43 = c_im(input[8 * istride]); + tmp44 = c_im(input[istride]); + tmp45 = tmp43 - tmp44; + tmp70 = tmp43 + tmp44; + tmp46 = c_im(input[6 * istride]); + tmp47 = c_im(input[13 * istride]); + tmp48 = tmp46 - tmp47; + tmp69 = tmp46 + tmp47; + } + tmp49 = tmp45 - tmp48; + tmp85 = tmp45 + tmp48; + tmp71 = tmp69 - tmp70; + tmp95 = tmp70 + tmp69; + } + { + fftw_real tmp57; + fftw_real tmp35; + fftw_real tmp72; + fftw_real tmp62; + ASSERT_ALIGNED_DOUBLE; + c_re(output[7 * ostride]) = tmp3 + tmp10 + tmp17 + tmp24; + tmp57 = + (K433883739 * tmp42) + (K974927912 * tmp49) - + (K781831482 * tmp56); + tmp35 = + tmp3 + (K623489801 * tmp17) - (K222520933 * tmp24) - + (K900968867 * tmp10); + c_re(output[11 * ostride]) = tmp35 - tmp57; + c_re(output[3 * ostride]) = tmp35 + tmp57; + { + fftw_real tmp59; + fftw_real tmp58; + fftw_real tmp61; + fftw_real tmp60; + ASSERT_ALIGNED_DOUBLE; + tmp59 = + (K974927912 * tmp42) - (K433883739 * tmp56) - + (K781831482 * tmp49); + tmp58 = + tmp3 + (K623489801 * tmp24) - (K900968867 * tmp17) - + (K222520933 * tmp10); + c_re(output[5 * ostride]) = tmp58 - tmp59; + c_re(output[9 * ostride]) = tmp58 + tmp59; + tmp61 = + (K781831482 * tmp42) + (K433883739 * tmp49) + + (K974927912 * tmp56); + tmp60 = + tmp3 + (K623489801 * tmp10) - (K900968867 * tmp24) - + (K222520933 * tmp17); + c_re(output[13 * ostride]) = tmp60 - tmp61; + c_re(output[ostride]) = tmp60 + tmp61; + } + c_re(output[0]) = tmp25 + tmp28 + tmp31 + tmp34; + tmp72 = + (K781831482 * tmp65) - (K974927912 * tmp68) - + (K433883739 * tmp71); + tmp62 = + tmp25 + (K623489801 * tmp28) - (K900968867 * tmp34) - + (K222520933 * tmp31); + c_re(output[6 * ostride]) = tmp62 - tmp72; + c_re(output[8 * ostride]) = tmp62 + tmp72; + { + fftw_real tmp74; + fftw_real tmp73; + fftw_real tmp76; + fftw_real tmp75; + ASSERT_ALIGNED_DOUBLE; + tmp74 = + (K433883739 * tmp65) + (K781831482 * tmp68) - + (K974927912 * tmp71); + tmp73 = + tmp25 + (K623489801 * tmp31) - (K222520933 * tmp34) - + (K900968867 * tmp28); + c_re(output[4 * ostride]) = tmp73 - tmp74; + c_re(output[10 * ostride]) = tmp73 + tmp74; + tmp76 = + (K974927912 * tmp65) + (K781831482 * tmp71) + + (K433883739 * tmp68); + tmp75 = + tmp25 + (K623489801 * tmp34) - (K900968867 * tmp31) - + (K222520933 * tmp28); + c_re(output[12 * ostride]) = tmp75 - tmp76; + c_re(output[2 * ostride]) = tmp75 + tmp76; + } + } + { + fftw_real tmp91; + fftw_real tmp90; + fftw_real tmp103; + fftw_real tmp104; + ASSERT_ALIGNED_DOUBLE; + c_im(output[7 * ostride]) = tmp86 + tmp85 + tmp81 + tmp84; + tmp91 = + (K974927912 * tmp77) - (K781831482 * tmp78) - + (K433883739 * tmp79); + tmp90 = + (K623489801 * tmp85) + tmp84 - (K900968867 * tmp81) - + (K222520933 * tmp86); + c_im(output[5 * ostride]) = tmp90 - tmp91; + c_im(output[9 * ostride]) = tmp91 + tmp90; + { + fftw_real tmp88; + fftw_real tmp89; + fftw_real tmp80; + fftw_real tmp87; + ASSERT_ALIGNED_DOUBLE; + tmp88 = + (K781831482 * tmp77) + (K974927912 * tmp79) + + (K433883739 * tmp78); + tmp89 = + (K623489801 * tmp86) + tmp84 - (K222520933 * tmp81) - + (K900968867 * tmp85); + c_im(output[ostride]) = tmp88 + tmp89; + c_im(output[13 * ostride]) = tmp89 - tmp88; + tmp80 = + (K433883739 * tmp77) + (K974927912 * tmp78) - + (K781831482 * tmp79); + tmp87 = + (K623489801 * tmp81) + tmp84 - (K222520933 * tmp85) - + (K900968867 * tmp86); + c_im(output[3 * ostride]) = tmp80 + tmp87; + c_im(output[11 * ostride]) = tmp87 - tmp80; + } + c_im(output[0]) = tmp92 + tmp95 + tmp94 + tmp93; + tmp103 = + (K974927912 * tmp97) + (K433883739 * tmp99) + + (K781831482 * tmp98); + tmp104 = + (K623489801 * tmp95) + tmp93 - (K900968867 * tmp94) - + (K222520933 * tmp92); + c_im(output[2 * ostride]) = tmp103 + tmp104; + c_im(output[12 * ostride]) = tmp104 - tmp103; + { + fftw_real tmp100; + fftw_real tmp96; + fftw_real tmp102; + fftw_real tmp101; + ASSERT_ALIGNED_DOUBLE; + tmp100 = + (K781831482 * tmp97) - (K433883739 * tmp98) - + (K974927912 * tmp99); + tmp96 = + (K623489801 * tmp92) + tmp93 - (K222520933 * tmp94) - + (K900968867 * tmp95); + c_im(output[6 * ostride]) = tmp96 - tmp100; + c_im(output[8 * ostride]) = tmp100 + tmp96; + tmp102 = + (K433883739 * tmp97) + (K781831482 * tmp99) - + (K974927912 * tmp98); + tmp101 = + (K623489801 * tmp94) + tmp93 - (K222520933 * tmp95) - + (K900968867 * tmp92); + c_im(output[4 * ostride]) = tmp101 - tmp102; + c_im(output[10 * ostride]) = tmp102 + tmp101; + } + } +} + +fftw_codelet_desc fftwi_no_twiddle_14_desc = { + "fftwi_no_twiddle_14", + (void (*)()) fftwi_no_twiddle_14, + 14, + FFTW_BACKWARD, + FFTW_NOTW, + 320, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_15.c b/src/fftw/fni_15.c new file mode 100644 index 0000000..1f2c342 --- /dev/null +++ b/src/fftw/fni_15.c @@ -0,0 +1,452 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:46 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 15 */ + +/* + * This function contains 156 FP additions, 56 FP multiplications, + * (or, 128 additions, 28 multiplications, 28 fused multiply/add), + * 62 stack variables, and 60 memory accesses + */ +static const fftw_real K951056516 = +FFTW_KONST(+0.951056516295153572116439333379382143405698634); +static const fftw_real K587785252 = +FFTW_KONST(+0.587785252292473129168705954639072768597652438); +static const fftw_real K250000000 = +FFTW_KONST(+0.250000000000000000000000000000000000000000000); +static const fftw_real K559016994 = +FFTW_KONST(+0.559016994374947424102293417182819058860154590); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); + +/* + * Generator Id's : + * $Id: fni_15.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_15.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_15.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_15(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp5; + fftw_real tmp121; + fftw_real tmp148; + fftw_real tmp87; + fftw_real tmp35; + fftw_real tmp67; + fftw_real tmp21; + fftw_real tmp26; + fftw_real tmp27; + fftw_real tmp111; + fftw_real tmp114; + fftw_real tmp123; + fftw_real tmp139; + fftw_real tmp140; + fftw_real tmp146; + fftw_real tmp81; + fftw_real tmp82; + fftw_real tmp89; + fftw_real tmp71; + fftw_real tmp72; + fftw_real tmp73; + fftw_real tmp57; + fftw_real tmp64; + fftw_real tmp65; + fftw_real tmp10; + fftw_real tmp15; + fftw_real tmp16; + fftw_real tmp104; + fftw_real tmp107; + fftw_real tmp122; + fftw_real tmp136; + fftw_real tmp137; + fftw_real tmp145; + fftw_real tmp78; + fftw_real tmp79; + fftw_real tmp88; + fftw_real tmp68; + fftw_real tmp69; + fftw_real tmp70; + fftw_real tmp42; + fftw_real tmp49; + fftw_real tmp50; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp30; + fftw_real tmp4; + fftw_real tmp29; + fftw_real tmp33; + fftw_real tmp120; + fftw_real tmp119; + fftw_real tmp34; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp30 = c_im(input[0]); + { + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp31; + fftw_real tmp32; + ASSERT_ALIGNED_DOUBLE; + tmp2 = c_re(input[5 * istride]); + tmp3 = c_re(input[10 * istride]); + tmp4 = tmp2 + tmp3; + tmp29 = K866025403 * (tmp2 - tmp3); + tmp31 = c_im(input[5 * istride]); + tmp32 = c_im(input[10 * istride]); + tmp33 = tmp31 + tmp32; + tmp120 = K866025403 * (tmp32 - tmp31); + } + tmp5 = tmp1 + tmp4; + tmp119 = tmp1 - (K500000000 * tmp4); + tmp121 = tmp119 - tmp120; + tmp148 = tmp119 + tmp120; + tmp87 = tmp30 + tmp33; + tmp34 = tmp30 - (K500000000 * tmp33); + tmp35 = tmp29 + tmp34; + tmp67 = tmp34 - tmp29; + } + { + fftw_real tmp17; + fftw_real tmp20; + fftw_real tmp51; + fftw_real tmp109; + fftw_real tmp52; + fftw_real tmp55; + fftw_real tmp56; + fftw_real tmp110; + fftw_real tmp22; + fftw_real tmp25; + fftw_real tmp58; + fftw_real tmp112; + fftw_real tmp59; + fftw_real tmp62; + fftw_real tmp63; + fftw_real tmp113; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp18; + fftw_real tmp19; + fftw_real tmp53; + fftw_real tmp54; + ASSERT_ALIGNED_DOUBLE; + tmp17 = c_re(input[6 * istride]); + tmp18 = c_re(input[11 * istride]); + tmp19 = c_re(input[istride]); + tmp20 = tmp18 + tmp19; + tmp51 = K866025403 * (tmp18 - tmp19); + tmp109 = tmp17 - (K500000000 * tmp20); + tmp52 = c_im(input[6 * istride]); + tmp53 = c_im(input[11 * istride]); + tmp54 = c_im(input[istride]); + tmp55 = tmp53 + tmp54; + tmp56 = tmp52 - (K500000000 * tmp55); + tmp110 = K866025403 * (tmp54 - tmp53); + } + { + fftw_real tmp23; + fftw_real tmp24; + fftw_real tmp60; + fftw_real tmp61; + ASSERT_ALIGNED_DOUBLE; + tmp22 = c_re(input[9 * istride]); + tmp23 = c_re(input[14 * istride]); + tmp24 = c_re(input[4 * istride]); + tmp25 = tmp23 + tmp24; + tmp58 = K866025403 * (tmp23 - tmp24); + tmp112 = tmp22 - (K500000000 * tmp25); + tmp59 = c_im(input[9 * istride]); + tmp60 = c_im(input[14 * istride]); + tmp61 = c_im(input[4 * istride]); + tmp62 = tmp60 + tmp61; + tmp63 = tmp59 - (K500000000 * tmp62); + tmp113 = K866025403 * (tmp61 - tmp60); + } + tmp21 = tmp17 + tmp20; + tmp26 = tmp22 + tmp25; + tmp27 = tmp21 + tmp26; + tmp111 = tmp109 - tmp110; + tmp114 = tmp112 - tmp113; + tmp123 = tmp111 + tmp114; + tmp139 = tmp109 + tmp110; + tmp140 = tmp112 + tmp113; + tmp146 = tmp139 + tmp140; + tmp81 = tmp52 + tmp55; + tmp82 = tmp59 + tmp62; + tmp89 = tmp81 + tmp82; + tmp71 = tmp56 - tmp51; + tmp72 = tmp63 - tmp58; + tmp73 = tmp71 + tmp72; + tmp57 = tmp51 + tmp56; + tmp64 = tmp58 + tmp63; + tmp65 = tmp57 + tmp64; + } + { + fftw_real tmp6; + fftw_real tmp9; + fftw_real tmp36; + fftw_real tmp102; + fftw_real tmp37; + fftw_real tmp40; + fftw_real tmp41; + fftw_real tmp103; + fftw_real tmp11; + fftw_real tmp14; + fftw_real tmp43; + fftw_real tmp105; + fftw_real tmp44; + fftw_real tmp47; + fftw_real tmp48; + fftw_real tmp106; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp7; + fftw_real tmp8; + fftw_real tmp38; + fftw_real tmp39; + ASSERT_ALIGNED_DOUBLE; + tmp6 = c_re(input[3 * istride]); + tmp7 = c_re(input[8 * istride]); + tmp8 = c_re(input[13 * istride]); + tmp9 = tmp7 + tmp8; + tmp36 = K866025403 * (tmp7 - tmp8); + tmp102 = tmp6 - (K500000000 * tmp9); + tmp37 = c_im(input[3 * istride]); + tmp38 = c_im(input[8 * istride]); + tmp39 = c_im(input[13 * istride]); + tmp40 = tmp38 + tmp39; + tmp41 = tmp37 - (K500000000 * tmp40); + tmp103 = K866025403 * (tmp39 - tmp38); + } + { + fftw_real tmp12; + fftw_real tmp13; + fftw_real tmp45; + fftw_real tmp46; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[12 * istride]); + tmp12 = c_re(input[2 * istride]); + tmp13 = c_re(input[7 * istride]); + tmp14 = tmp12 + tmp13; + tmp43 = K866025403 * (tmp12 - tmp13); + tmp105 = tmp11 - (K500000000 * tmp14); + tmp44 = c_im(input[12 * istride]); + tmp45 = c_im(input[2 * istride]); + tmp46 = c_im(input[7 * istride]); + tmp47 = tmp45 + tmp46; + tmp48 = tmp44 - (K500000000 * tmp47); + tmp106 = K866025403 * (tmp46 - tmp45); + } + tmp10 = tmp6 + tmp9; + tmp15 = tmp11 + tmp14; + tmp16 = tmp10 + tmp15; + tmp104 = tmp102 - tmp103; + tmp107 = tmp105 - tmp106; + tmp122 = tmp104 + tmp107; + tmp136 = tmp102 + tmp103; + tmp137 = tmp105 + tmp106; + tmp145 = tmp136 + tmp137; + tmp78 = tmp37 + tmp40; + tmp79 = tmp44 + tmp47; + tmp88 = tmp78 + tmp79; + tmp68 = tmp41 - tmp36; + tmp69 = tmp48 - tmp43; + tmp70 = tmp68 + tmp69; + tmp42 = tmp36 + tmp41; + tmp49 = tmp43 + tmp48; + tmp50 = tmp42 + tmp49; + } + { + fftw_real tmp76; + fftw_real tmp28; + fftw_real tmp75; + fftw_real tmp84; + fftw_real tmp86; + fftw_real tmp80; + fftw_real tmp83; + fftw_real tmp85; + fftw_real tmp77; + ASSERT_ALIGNED_DOUBLE; + tmp76 = K559016994 * (tmp16 - tmp27); + tmp28 = tmp16 + tmp27; + tmp75 = tmp5 - (K250000000 * tmp28); + tmp80 = tmp78 - tmp79; + tmp83 = tmp81 - tmp82; + tmp84 = (K587785252 * tmp80) - (K951056516 * tmp83); + tmp86 = (K951056516 * tmp80) + (K587785252 * tmp83); + c_re(output[0]) = tmp5 + tmp28; + tmp85 = tmp76 + tmp75; + c_re(output[6 * ostride]) = tmp85 - tmp86; + c_re(output[9 * ostride]) = tmp85 + tmp86; + tmp77 = tmp75 - tmp76; + c_re(output[12 * ostride]) = tmp77 - tmp84; + c_re(output[3 * ostride]) = tmp77 + tmp84; + } + { + fftw_real tmp134; + fftw_real tmp66; + fftw_real tmp133; + fftw_real tmp142; + fftw_real tmp144; + fftw_real tmp138; + fftw_real tmp141; + fftw_real tmp143; + fftw_real tmp135; + ASSERT_ALIGNED_DOUBLE; + tmp134 = K559016994 * (tmp50 - tmp65); + tmp66 = tmp50 + tmp65; + tmp133 = tmp35 - (K250000000 * tmp66); + tmp138 = tmp136 - tmp137; + tmp141 = tmp139 - tmp140; + tmp142 = (K587785252 * tmp138) - (K951056516 * tmp141); + tmp144 = (K951056516 * tmp138) + (K587785252 * tmp141); + c_im(output[10 * ostride]) = tmp35 + tmp66; + tmp143 = tmp134 + tmp133; + c_im(output[4 * ostride]) = tmp143 - tmp144; + c_im(output[ostride]) = tmp143 + tmp144; + tmp135 = tmp133 - tmp134; + c_im(output[13 * ostride]) = tmp135 - tmp142; + c_im(output[7 * ostride]) = tmp135 + tmp142; + } + { + fftw_real tmp147; + fftw_real tmp149; + fftw_real tmp150; + fftw_real tmp154; + fftw_real tmp156; + fftw_real tmp152; + fftw_real tmp153; + fftw_real tmp155; + fftw_real tmp151; + ASSERT_ALIGNED_DOUBLE; + tmp147 = K559016994 * (tmp145 - tmp146); + tmp149 = tmp145 + tmp146; + tmp150 = tmp148 - (K250000000 * tmp149); + tmp152 = tmp42 - tmp49; + tmp153 = tmp57 - tmp64; + tmp154 = (K951056516 * tmp152) + (K587785252 * tmp153); + tmp156 = (K587785252 * tmp152) - (K951056516 * tmp153); + c_re(output[10 * ostride]) = tmp148 + tmp149; + tmp155 = tmp150 - tmp147; + c_re(output[7 * ostride]) = tmp155 - tmp156; + c_re(output[13 * ostride]) = tmp156 + tmp155; + tmp151 = tmp147 + tmp150; + c_re(output[ostride]) = tmp151 - tmp154; + c_re(output[4 * ostride]) = tmp154 + tmp151; + } + { + fftw_real tmp126; + fftw_real tmp124; + fftw_real tmp125; + fftw_real tmp130; + fftw_real tmp132; + fftw_real tmp128; + fftw_real tmp129; + fftw_real tmp131; + fftw_real tmp127; + ASSERT_ALIGNED_DOUBLE; + tmp126 = K559016994 * (tmp122 - tmp123); + tmp124 = tmp122 + tmp123; + tmp125 = tmp121 - (K250000000 * tmp124); + tmp128 = tmp68 - tmp69; + tmp129 = tmp71 - tmp72; + tmp130 = (K587785252 * tmp128) - (K951056516 * tmp129); + tmp132 = (K951056516 * tmp128) + (K587785252 * tmp129); + c_re(output[5 * ostride]) = tmp121 + tmp124; + tmp131 = tmp126 + tmp125; + c_re(output[11 * ostride]) = tmp131 - tmp132; + c_re(output[14 * ostride]) = tmp132 + tmp131; + tmp127 = tmp125 - tmp126; + c_re(output[2 * ostride]) = tmp127 - tmp130; + c_re(output[8 * ostride]) = tmp130 + tmp127; + } + { + fftw_real tmp92; + fftw_real tmp90; + fftw_real tmp91; + fftw_real tmp96; + fftw_real tmp97; + fftw_real tmp94; + fftw_real tmp95; + fftw_real tmp98; + fftw_real tmp93; + ASSERT_ALIGNED_DOUBLE; + tmp92 = K559016994 * (tmp88 - tmp89); + tmp90 = tmp88 + tmp89; + tmp91 = tmp87 - (K250000000 * tmp90); + tmp94 = tmp10 - tmp15; + tmp95 = tmp21 - tmp26; + tmp96 = (K587785252 * tmp94) - (K951056516 * tmp95); + tmp97 = (K951056516 * tmp94) + (K587785252 * tmp95); + c_im(output[0]) = tmp87 + tmp90; + tmp98 = tmp92 + tmp91; + c_im(output[6 * ostride]) = tmp97 + tmp98; + c_im(output[9 * ostride]) = tmp98 - tmp97; + tmp93 = tmp91 - tmp92; + c_im(output[3 * ostride]) = tmp93 - tmp96; + c_im(output[12 * ostride]) = tmp96 + tmp93; + } + { + fftw_real tmp100; + fftw_real tmp74; + fftw_real tmp99; + fftw_real tmp116; + fftw_real tmp118; + fftw_real tmp108; + fftw_real tmp115; + fftw_real tmp117; + fftw_real tmp101; + ASSERT_ALIGNED_DOUBLE; + tmp100 = K559016994 * (tmp70 - tmp73); + tmp74 = tmp70 + tmp73; + tmp99 = tmp67 - (K250000000 * tmp74); + tmp108 = tmp104 - tmp107; + tmp115 = tmp111 - tmp114; + tmp116 = (K587785252 * tmp108) - (K951056516 * tmp115); + tmp118 = (K951056516 * tmp108) + (K587785252 * tmp115); + c_im(output[5 * ostride]) = tmp67 + tmp74; + tmp117 = tmp100 + tmp99; + c_im(output[14 * ostride]) = tmp117 - tmp118; + c_im(output[11 * ostride]) = tmp117 + tmp118; + tmp101 = tmp99 - tmp100; + c_im(output[8 * ostride]) = tmp101 - tmp116; + c_im(output[2 * ostride]) = tmp101 + tmp116; + } +} + +fftw_codelet_desc fftwi_no_twiddle_15_desc = { + "fftwi_no_twiddle_15", + (void (*)()) fftwi_no_twiddle_15, + 15, + FFTW_BACKWARD, + FFTW_NOTW, + 342, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_16.c b/src/fftw/fni_16.c new file mode 100644 index 0000000..bb2e49a --- /dev/null +++ b/src/fftw/fni_16.c @@ -0,0 +1,442 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:47 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 16 */ + +/* + * This function contains 144 FP additions, 24 FP multiplications, + * (or, 136 additions, 16 multiplications, 8 fused multiply/add), + * 46 stack variables, and 64 memory accesses + */ +static const fftw_real K382683432 = +FFTW_KONST(+0.382683432365089771728459984030398866761344562); +static const fftw_real K923879532 = +FFTW_KONST(+0.923879532511286756128183189396788286822416626); +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: fni_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_16(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp7; + fftw_real tmp129; + fftw_real tmp38; + fftw_real tmp115; + fftw_real tmp49; + fftw_real tmp95; + fftw_real tmp83; + fftw_real tmp105; + fftw_real tmp29; + fftw_real tmp123; + fftw_real tmp73; + fftw_real tmp101; + fftw_real tmp78; + fftw_real tmp102; + fftw_real tmp126; + fftw_real tmp141; + fftw_real tmp14; + fftw_real tmp116; + fftw_real tmp45; + fftw_real tmp130; + fftw_real tmp52; + fftw_real tmp84; + fftw_real tmp55; + fftw_real tmp85; + fftw_real tmp22; + fftw_real tmp118; + fftw_real tmp62; + fftw_real tmp98; + fftw_real tmp67; + fftw_real tmp99; + fftw_real tmp121; + fftw_real tmp140; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp3; + fftw_real tmp81; + fftw_real tmp34; + fftw_real tmp48; + fftw_real tmp6; + fftw_real tmp47; + fftw_real tmp37; + fftw_real tmp82; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp32; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[8 * istride]); + tmp3 = tmp1 + tmp2; + tmp81 = tmp1 - tmp2; + tmp32 = c_im(input[0]); + tmp33 = c_im(input[8 * istride]); + tmp34 = tmp32 + tmp33; + tmp48 = tmp32 - tmp33; + } + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp35; + fftw_real tmp36; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[4 * istride]); + tmp5 = c_re(input[12 * istride]); + tmp6 = tmp4 + tmp5; + tmp47 = tmp4 - tmp5; + tmp35 = c_im(input[4 * istride]); + tmp36 = c_im(input[12 * istride]); + tmp37 = tmp35 + tmp36; + tmp82 = tmp35 - tmp36; + } + tmp7 = tmp3 + tmp6; + tmp129 = tmp3 - tmp6; + tmp38 = tmp34 + tmp37; + tmp115 = tmp34 - tmp37; + tmp49 = tmp47 + tmp48; + tmp95 = tmp48 - tmp47; + tmp83 = tmp81 - tmp82; + tmp105 = tmp81 + tmp82; + } + { + fftw_real tmp25; + fftw_real tmp74; + fftw_real tmp72; + fftw_real tmp124; + fftw_real tmp28; + fftw_real tmp69; + fftw_real tmp77; + fftw_real tmp125; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp23; + fftw_real tmp24; + fftw_real tmp70; + fftw_real tmp71; + ASSERT_ALIGNED_DOUBLE; + tmp23 = c_re(input[15 * istride]); + tmp24 = c_re(input[7 * istride]); + tmp25 = tmp23 + tmp24; + tmp74 = tmp23 - tmp24; + tmp70 = c_im(input[15 * istride]); + tmp71 = c_im(input[7 * istride]); + tmp72 = tmp70 - tmp71; + tmp124 = tmp70 + tmp71; + } + { + fftw_real tmp26; + fftw_real tmp27; + fftw_real tmp75; + fftw_real tmp76; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(input[3 * istride]); + tmp27 = c_re(input[11 * istride]); + tmp28 = tmp26 + tmp27; + tmp69 = tmp26 - tmp27; + tmp75 = c_im(input[3 * istride]); + tmp76 = c_im(input[11 * istride]); + tmp77 = tmp75 - tmp76; + tmp125 = tmp75 + tmp76; + } + tmp29 = tmp25 + tmp28; + tmp123 = tmp25 - tmp28; + tmp73 = tmp69 + tmp72; + tmp101 = tmp72 - tmp69; + tmp78 = tmp74 - tmp77; + tmp102 = tmp74 + tmp77; + tmp126 = tmp124 - tmp125; + tmp141 = tmp124 + tmp125; + } + { + fftw_real tmp10; + fftw_real tmp50; + fftw_real tmp41; + fftw_real tmp51; + fftw_real tmp13; + fftw_real tmp54; + fftw_real tmp44; + fftw_real tmp53; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp39; + fftw_real tmp40; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(input[2 * istride]); + tmp9 = c_re(input[10 * istride]); + tmp10 = tmp8 + tmp9; + tmp50 = tmp8 - tmp9; + tmp39 = c_im(input[2 * istride]); + tmp40 = c_im(input[10 * istride]); + tmp41 = tmp39 + tmp40; + tmp51 = tmp39 - tmp40; + } + { + fftw_real tmp11; + fftw_real tmp12; + fftw_real tmp42; + fftw_real tmp43; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[14 * istride]); + tmp12 = c_re(input[6 * istride]); + tmp13 = tmp11 + tmp12; + tmp54 = tmp11 - tmp12; + tmp42 = c_im(input[14 * istride]); + tmp43 = c_im(input[6 * istride]); + tmp44 = tmp42 + tmp43; + tmp53 = tmp42 - tmp43; + } + tmp14 = tmp10 + tmp13; + tmp116 = tmp10 - tmp13; + tmp45 = tmp41 + tmp44; + tmp130 = tmp44 - tmp41; + tmp52 = tmp50 + tmp51; + tmp84 = tmp50 - tmp51; + tmp55 = tmp53 - tmp54; + tmp85 = tmp54 + tmp53; + } + { + fftw_real tmp18; + fftw_real tmp63; + fftw_real tmp61; + fftw_real tmp119; + fftw_real tmp21; + fftw_real tmp58; + fftw_real tmp66; + fftw_real tmp120; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp16; + fftw_real tmp17; + fftw_real tmp59; + fftw_real tmp60; + ASSERT_ALIGNED_DOUBLE; + tmp16 = c_re(input[istride]); + tmp17 = c_re(input[9 * istride]); + tmp18 = tmp16 + tmp17; + tmp63 = tmp16 - tmp17; + tmp59 = c_im(input[istride]); + tmp60 = c_im(input[9 * istride]); + tmp61 = tmp59 - tmp60; + tmp119 = tmp59 + tmp60; + } + { + fftw_real tmp19; + fftw_real tmp20; + fftw_real tmp64; + fftw_real tmp65; + ASSERT_ALIGNED_DOUBLE; + tmp19 = c_re(input[5 * istride]); + tmp20 = c_re(input[13 * istride]); + tmp21 = tmp19 + tmp20; + tmp58 = tmp19 - tmp20; + tmp64 = c_im(input[5 * istride]); + tmp65 = c_im(input[13 * istride]); + tmp66 = tmp64 - tmp65; + tmp120 = tmp64 + tmp65; + } + tmp22 = tmp18 + tmp21; + tmp118 = tmp18 - tmp21; + tmp62 = tmp58 + tmp61; + tmp98 = tmp61 - tmp58; + tmp67 = tmp63 - tmp66; + tmp99 = tmp63 + tmp66; + tmp121 = tmp119 - tmp120; + tmp140 = tmp119 + tmp120; + } + { + fftw_real tmp15; + fftw_real tmp30; + fftw_real tmp31; + fftw_real tmp46; + ASSERT_ALIGNED_DOUBLE; + tmp15 = tmp7 + tmp14; + tmp30 = tmp22 + tmp29; + c_re(output[8 * ostride]) = tmp15 - tmp30; + c_re(output[0]) = tmp15 + tmp30; + tmp31 = tmp22 - tmp29; + tmp46 = tmp38 - tmp45; + c_im(output[4 * ostride]) = tmp31 + tmp46; + c_im(output[12 * ostride]) = tmp46 - tmp31; + } + { + fftw_real tmp139; + fftw_real tmp142; + fftw_real tmp143; + fftw_real tmp144; + ASSERT_ALIGNED_DOUBLE; + tmp139 = tmp38 + tmp45; + tmp142 = tmp140 + tmp141; + c_im(output[8 * ostride]) = tmp139 - tmp142; + c_im(output[0]) = tmp139 + tmp142; + tmp143 = tmp7 - tmp14; + tmp144 = tmp141 - tmp140; + c_re(output[12 * ostride]) = tmp143 - tmp144; + c_re(output[4 * ostride]) = tmp143 + tmp144; + } + { + fftw_real tmp117; + fftw_real tmp131; + fftw_real tmp128; + fftw_real tmp132; + fftw_real tmp122; + fftw_real tmp127; + ASSERT_ALIGNED_DOUBLE; + tmp117 = tmp115 - tmp116; + tmp131 = tmp129 + tmp130; + tmp122 = tmp118 - tmp121; + tmp127 = tmp123 + tmp126; + tmp128 = K707106781 * (tmp122 - tmp127); + tmp132 = K707106781 * (tmp122 + tmp127); + c_im(output[14 * ostride]) = tmp117 - tmp128; + c_im(output[6 * ostride]) = tmp117 + tmp128; + c_re(output[10 * ostride]) = tmp131 - tmp132; + c_re(output[2 * ostride]) = tmp131 + tmp132; + } + { + fftw_real tmp133; + fftw_real tmp137; + fftw_real tmp136; + fftw_real tmp138; + fftw_real tmp134; + fftw_real tmp135; + ASSERT_ALIGNED_DOUBLE; + tmp133 = tmp116 + tmp115; + tmp137 = tmp129 - tmp130; + tmp134 = tmp118 + tmp121; + tmp135 = tmp126 - tmp123; + tmp136 = K707106781 * (tmp134 + tmp135); + tmp138 = K707106781 * (tmp135 - tmp134); + c_im(output[10 * ostride]) = tmp133 - tmp136; + c_im(output[2 * ostride]) = tmp133 + tmp136; + c_re(output[14 * ostride]) = tmp137 - tmp138; + c_re(output[6 * ostride]) = tmp137 + tmp138; + } + { + fftw_real tmp57; + fftw_real tmp89; + fftw_real tmp92; + fftw_real tmp94; + fftw_real tmp87; + fftw_real tmp93; + fftw_real tmp80; + fftw_real tmp88; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp56; + fftw_real tmp90; + fftw_real tmp91; + fftw_real tmp86; + fftw_real tmp68; + fftw_real tmp79; + ASSERT_ALIGNED_DOUBLE; + tmp56 = K707106781 * (tmp52 + tmp55); + tmp57 = tmp49 + tmp56; + tmp89 = tmp49 - tmp56; + tmp90 = (K923879532 * tmp67) - (K382683432 * tmp62); + tmp91 = (K382683432 * tmp73) + (K923879532 * tmp78); + tmp92 = tmp90 - tmp91; + tmp94 = tmp90 + tmp91; + tmp86 = K707106781 * (tmp84 + tmp85); + tmp87 = tmp83 - tmp86; + tmp93 = tmp83 + tmp86; + tmp68 = (K923879532 * tmp62) + (K382683432 * tmp67); + tmp79 = (K923879532 * tmp73) - (K382683432 * tmp78); + tmp80 = tmp68 + tmp79; + tmp88 = tmp79 - tmp68; + } + c_im(output[9 * ostride]) = tmp57 - tmp80; + c_im(output[ostride]) = tmp57 + tmp80; + c_re(output[13 * ostride]) = tmp87 - tmp88; + c_re(output[5 * ostride]) = tmp87 + tmp88; + c_im(output[13 * ostride]) = tmp89 - tmp92; + c_im(output[5 * ostride]) = tmp89 + tmp92; + c_re(output[9 * ostride]) = tmp93 - tmp94; + c_re(output[ostride]) = tmp93 + tmp94; + } + { + fftw_real tmp97; + fftw_real tmp109; + fftw_real tmp112; + fftw_real tmp114; + fftw_real tmp107; + fftw_real tmp113; + fftw_real tmp104; + fftw_real tmp108; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp96; + fftw_real tmp110; + fftw_real tmp111; + fftw_real tmp106; + fftw_real tmp100; + fftw_real tmp103; + ASSERT_ALIGNED_DOUBLE; + tmp96 = K707106781 * (tmp84 - tmp85); + tmp97 = tmp95 + tmp96; + tmp109 = tmp95 - tmp96; + tmp110 = (K382683432 * tmp99) - (K923879532 * tmp98); + tmp111 = (K923879532 * tmp101) + (K382683432 * tmp102); + tmp112 = tmp110 - tmp111; + tmp114 = tmp110 + tmp111; + tmp106 = K707106781 * (tmp55 - tmp52); + tmp107 = tmp105 - tmp106; + tmp113 = tmp105 + tmp106; + tmp100 = (K382683432 * tmp98) + (K923879532 * tmp99); + tmp103 = (K382683432 * tmp101) - (K923879532 * tmp102); + tmp104 = tmp100 + tmp103; + tmp108 = tmp103 - tmp100; + } + c_im(output[11 * ostride]) = tmp97 - tmp104; + c_im(output[3 * ostride]) = tmp97 + tmp104; + c_re(output[15 * ostride]) = tmp107 - tmp108; + c_re(output[7 * ostride]) = tmp107 + tmp108; + c_im(output[15 * ostride]) = tmp109 - tmp112; + c_im(output[7 * ostride]) = tmp109 + tmp112; + c_re(output[11 * ostride]) = tmp113 - tmp114; + c_re(output[3 * ostride]) = tmp113 + tmp114; + } +} + +fftw_codelet_desc fftwi_no_twiddle_16_desc = { + "fftwi_no_twiddle_16", + (void (*)()) fftwi_no_twiddle_16, + 16, + FFTW_BACKWARD, + FFTW_NOTW, + 364, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_2.c b/src/fftw/fni_2.c new file mode 100644 index 0000000..ca159e5 --- /dev/null +++ b/src/fftw/fni_2.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:18 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 2 */ + +/* + * This function contains 4 FP additions, 0 FP multiplications, + * (or, 4 additions, 0 multiplications, 0 fused multiply/add), + * 4 stack variables, and 8 memory accesses + */ + +/* + * Generator Id's : + * $Id: fni_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_2(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[istride]); + c_re(output[ostride]) = tmp1 - tmp2; + c_re(output[0]) = tmp1 + tmp2; + tmp3 = c_im(input[0]); + tmp4 = c_im(input[istride]); + c_im(output[ostride]) = tmp3 - tmp4; + c_im(output[0]) = tmp3 + tmp4; +} + +fftw_codelet_desc fftwi_no_twiddle_2_desc = { + "fftwi_no_twiddle_2", + (void (*)()) fftwi_no_twiddle_2, + 2, + FFTW_BACKWARD, + FFTW_NOTW, + 56, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_3.c b/src/fftw/fni_3.c new file mode 100644 index 0000000..9db0abd --- /dev/null +++ b/src/fftw/fni_3.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:18 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 3 */ + +/* + * This function contains 12 FP additions, 4 FP multiplications, + * (or, 10 additions, 2 multiplications, 2 fused multiply/add), + * 12 stack variables, and 12 memory accesses + */ +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); + +/* + * Generator Id's : + * $Id: fni_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_3(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp6; + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp9; + fftw_real tmp12; + fftw_real tmp11; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp6 = c_im(input[0]); + { + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp7; + fftw_real tmp8; + ASSERT_ALIGNED_DOUBLE; + tmp2 = c_re(input[istride]); + tmp3 = c_re(input[2 * istride]); + tmp4 = tmp2 + tmp3; + tmp5 = K866025403 * (tmp2 - tmp3); + tmp7 = c_im(input[istride]); + tmp8 = c_im(input[2 * istride]); + tmp9 = tmp7 + tmp8; + tmp12 = K866025403 * (tmp8 - tmp7); + } + c_re(output[0]) = tmp1 + tmp4; + tmp11 = tmp1 - (K500000000 * tmp4); + c_re(output[2 * ostride]) = tmp11 - tmp12; + c_re(output[ostride]) = tmp11 + tmp12; + c_im(output[0]) = tmp6 + tmp9; + tmp10 = tmp6 - (K500000000 * tmp9); + c_im(output[ostride]) = tmp5 + tmp10; + c_im(output[2 * ostride]) = tmp10 - tmp5; +} + +fftw_codelet_desc fftwi_no_twiddle_3_desc = { + "fftwi_no_twiddle_3", + (void (*)()) fftwi_no_twiddle_3, + 3, + FFTW_BACKWARD, + FFTW_NOTW, + 78, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_32.c b/src/fftw/fni_32.c new file mode 100644 index 0000000..d748fd0 --- /dev/null +++ b/src/fftw/fni_32.c @@ -0,0 +1,1049 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:50 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 32 */ + +/* + * This function contains 372 FP additions, 84 FP multiplications, + * (or, 340 additions, 52 multiplications, 32 fused multiply/add), + * 92 stack variables, and 128 memory accesses + */ +static const fftw_real K195090322 = +FFTW_KONST(+0.195090322016128267848284868477022240927691618); +static const fftw_real K980785280 = +FFTW_KONST(+0.980785280403230449126182236134239036973933731); +static const fftw_real K831469612 = +FFTW_KONST(+0.831469612302545237078788377617905756738560812); +static const fftw_real K555570233 = +FFTW_KONST(+0.555570233019602224742830813948532874374937191); +static const fftw_real K382683432 = +FFTW_KONST(+0.382683432365089771728459984030398866761344562); +static const fftw_real K923879532 = +FFTW_KONST(+0.923879532511286756128183189396788286822416626); +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: fni_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_32(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp7; + fftw_real tmp339; + fftw_real tmp70; + fftw_real tmp313; + fftw_real tmp97; + fftw_real tmp215; + fftw_real tmp179; + fftw_real tmp241; + fftw_real tmp14; + fftw_real tmp314; + fftw_real tmp77; + fftw_real tmp340; + fftw_real tmp182; + fftw_real tmp216; + fftw_real tmp104; + fftw_real tmp242; + fftw_real tmp153; + fftw_real tmp236; + fftw_real tmp53; + fftw_real tmp60; + fftw_real tmp287; + fftw_real tmp336; + fftw_real tmp360; + fftw_real tmp290; + fftw_real tmp293; + fftw_real tmp294; + fftw_real tmp170; + fftw_real tmp233; + fftw_real tmp333; + fftw_real tmp359; + fftw_real tmp164; + fftw_real tmp234; + fftw_real tmp173; + fftw_real tmp237; + fftw_real tmp22; + fftw_real tmp318; + fftw_real tmp343; + fftw_real tmp85; + fftw_real tmp112; + fftw_real tmp185; + fftw_real tmp220; + fftw_real tmp245; + fftw_real tmp29; + fftw_real tmp321; + fftw_real tmp342; + fftw_real tmp92; + fftw_real tmp119; + fftw_real tmp184; + fftw_real tmp223; + fftw_real tmp244; + fftw_real tmp126; + fftw_real tmp229; + fftw_real tmp38; + fftw_real tmp45; + fftw_real tmp278; + fftw_real tmp329; + fftw_real tmp357; + fftw_real tmp281; + fftw_real tmp284; + fftw_real tmp285; + fftw_real tmp143; + fftw_real tmp226; + fftw_real tmp326; + fftw_real tmp356; + fftw_real tmp137; + fftw_real tmp227; + fftw_real tmp146; + fftw_real tmp230; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp3; + fftw_real tmp177; + fftw_real tmp66; + fftw_real tmp96; + fftw_real tmp6; + fftw_real tmp95; + fftw_real tmp69; + fftw_real tmp178; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp64; + fftw_real tmp65; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[16 * istride]); + tmp3 = tmp1 + tmp2; + tmp177 = tmp1 - tmp2; + tmp64 = c_im(input[0]); + tmp65 = c_im(input[16 * istride]); + tmp66 = tmp64 + tmp65; + tmp96 = tmp64 - tmp65; + } + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp67; + fftw_real tmp68; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[8 * istride]); + tmp5 = c_re(input[24 * istride]); + tmp6 = tmp4 + tmp5; + tmp95 = tmp4 - tmp5; + tmp67 = c_im(input[8 * istride]); + tmp68 = c_im(input[24 * istride]); + tmp69 = tmp67 + tmp68; + tmp178 = tmp67 - tmp68; + } + tmp7 = tmp3 + tmp6; + tmp339 = tmp3 - tmp6; + tmp70 = tmp66 + tmp69; + tmp313 = tmp66 - tmp69; + tmp97 = tmp95 + tmp96; + tmp215 = tmp96 - tmp95; + tmp179 = tmp177 - tmp178; + tmp241 = tmp177 + tmp178; + } + { + fftw_real tmp10; + fftw_real tmp98; + fftw_real tmp73; + fftw_real tmp99; + fftw_real tmp13; + fftw_real tmp102; + fftw_real tmp76; + fftw_real tmp101; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp71; + fftw_real tmp72; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(input[4 * istride]); + tmp9 = c_re(input[20 * istride]); + tmp10 = tmp8 + tmp9; + tmp98 = tmp8 - tmp9; + tmp71 = c_im(input[4 * istride]); + tmp72 = c_im(input[20 * istride]); + tmp73 = tmp71 + tmp72; + tmp99 = tmp71 - tmp72; + } + { + fftw_real tmp11; + fftw_real tmp12; + fftw_real tmp74; + fftw_real tmp75; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[28 * istride]); + tmp12 = c_re(input[12 * istride]); + tmp13 = tmp11 + tmp12; + tmp102 = tmp11 - tmp12; + tmp74 = c_im(input[28 * istride]); + tmp75 = c_im(input[12 * istride]); + tmp76 = tmp74 + tmp75; + tmp101 = tmp74 - tmp75; + } + tmp14 = tmp10 + tmp13; + tmp314 = tmp10 - tmp13; + tmp77 = tmp73 + tmp76; + tmp340 = tmp76 - tmp73; + { + fftw_real tmp180; + fftw_real tmp181; + fftw_real tmp100; + fftw_real tmp103; + ASSERT_ALIGNED_DOUBLE; + tmp180 = tmp98 - tmp99; + tmp181 = tmp102 + tmp101; + tmp182 = K707106781 * (tmp180 + tmp181); + tmp216 = K707106781 * (tmp180 - tmp181); + tmp100 = tmp98 + tmp99; + tmp103 = tmp101 - tmp102; + tmp104 = K707106781 * (tmp100 + tmp103); + tmp242 = K707106781 * (tmp103 - tmp100); + } + } + { + fftw_real tmp49; + fftw_real tmp149; + fftw_real tmp169; + fftw_real tmp288; + fftw_real tmp52; + fftw_real tmp166; + fftw_real tmp152; + fftw_real tmp289; + fftw_real tmp56; + fftw_real tmp154; + fftw_real tmp157; + fftw_real tmp291; + fftw_real tmp59; + fftw_real tmp159; + fftw_real tmp162; + fftw_real tmp292; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp47; + fftw_real tmp48; + fftw_real tmp167; + fftw_real tmp168; + ASSERT_ALIGNED_DOUBLE; + tmp47 = c_re(input[31 * istride]); + tmp48 = c_re(input[15 * istride]); + tmp49 = tmp47 + tmp48; + tmp149 = tmp47 - tmp48; + tmp167 = c_im(input[31 * istride]); + tmp168 = c_im(input[15 * istride]); + tmp169 = tmp167 - tmp168; + tmp288 = tmp167 + tmp168; + } + { + fftw_real tmp50; + fftw_real tmp51; + fftw_real tmp150; + fftw_real tmp151; + ASSERT_ALIGNED_DOUBLE; + tmp50 = c_re(input[7 * istride]); + tmp51 = c_re(input[23 * istride]); + tmp52 = tmp50 + tmp51; + tmp166 = tmp50 - tmp51; + tmp150 = c_im(input[7 * istride]); + tmp151 = c_im(input[23 * istride]); + tmp152 = tmp150 - tmp151; + tmp289 = tmp150 + tmp151; + } + { + fftw_real tmp54; + fftw_real tmp55; + fftw_real tmp155; + fftw_real tmp156; + ASSERT_ALIGNED_DOUBLE; + tmp54 = c_re(input[3 * istride]); + tmp55 = c_re(input[19 * istride]); + tmp56 = tmp54 + tmp55; + tmp154 = tmp54 - tmp55; + tmp155 = c_im(input[3 * istride]); + tmp156 = c_im(input[19 * istride]); + tmp157 = tmp155 - tmp156; + tmp291 = tmp155 + tmp156; + } + { + fftw_real tmp57; + fftw_real tmp58; + fftw_real tmp160; + fftw_real tmp161; + ASSERT_ALIGNED_DOUBLE; + tmp57 = c_re(input[27 * istride]); + tmp58 = c_re(input[11 * istride]); + tmp59 = tmp57 + tmp58; + tmp159 = tmp57 - tmp58; + tmp160 = c_im(input[27 * istride]); + tmp161 = c_im(input[11 * istride]); + tmp162 = tmp160 - tmp161; + tmp292 = tmp160 + tmp161; + } + { + fftw_real tmp334; + fftw_real tmp335; + fftw_real tmp331; + fftw_real tmp332; + ASSERT_ALIGNED_DOUBLE; + tmp153 = tmp149 - tmp152; + tmp236 = tmp149 + tmp152; + tmp53 = tmp49 + tmp52; + tmp60 = tmp56 + tmp59; + tmp287 = tmp53 - tmp60; + tmp334 = tmp49 - tmp52; + tmp335 = tmp292 - tmp291; + tmp336 = tmp334 - tmp335; + tmp360 = tmp334 + tmp335; + tmp290 = tmp288 + tmp289; + tmp293 = tmp291 + tmp292; + tmp294 = tmp290 - tmp293; + tmp170 = tmp166 + tmp169; + tmp233 = tmp169 - tmp166; + tmp331 = tmp288 - tmp289; + tmp332 = tmp56 - tmp59; + tmp333 = tmp331 - tmp332; + tmp359 = tmp332 + tmp331; + { + fftw_real tmp158; + fftw_real tmp163; + fftw_real tmp171; + fftw_real tmp172; + ASSERT_ALIGNED_DOUBLE; + tmp158 = tmp154 - tmp157; + tmp163 = tmp159 + tmp162; + tmp164 = K707106781 * (tmp158 + tmp163); + tmp234 = K707106781 * (tmp158 - tmp163); + tmp171 = tmp154 + tmp157; + tmp172 = tmp162 - tmp159; + tmp173 = K707106781 * (tmp171 + tmp172); + tmp237 = K707106781 * (tmp172 - tmp171); + } + } + } + { + fftw_real tmp18; + fftw_real tmp106; + fftw_real tmp81; + fftw_real tmp110; + fftw_real tmp21; + fftw_real tmp109; + fftw_real tmp84; + fftw_real tmp107; + fftw_real tmp316; + fftw_real tmp317; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp16; + fftw_real tmp17; + fftw_real tmp79; + fftw_real tmp80; + ASSERT_ALIGNED_DOUBLE; + tmp16 = c_re(input[2 * istride]); + tmp17 = c_re(input[18 * istride]); + tmp18 = tmp16 + tmp17; + tmp106 = tmp16 - tmp17; + tmp79 = c_im(input[2 * istride]); + tmp80 = c_im(input[18 * istride]); + tmp81 = tmp79 + tmp80; + tmp110 = tmp79 - tmp80; + } + { + fftw_real tmp19; + fftw_real tmp20; + fftw_real tmp82; + fftw_real tmp83; + ASSERT_ALIGNED_DOUBLE; + tmp19 = c_re(input[10 * istride]); + tmp20 = c_re(input[26 * istride]); + tmp21 = tmp19 + tmp20; + tmp109 = tmp19 - tmp20; + tmp82 = c_im(input[10 * istride]); + tmp83 = c_im(input[26 * istride]); + tmp84 = tmp82 + tmp83; + tmp107 = tmp82 - tmp83; + } + tmp22 = tmp18 + tmp21; + tmp316 = tmp18 - tmp21; + tmp317 = tmp81 - tmp84; + tmp318 = tmp316 - tmp317; + tmp343 = tmp316 + tmp317; + tmp85 = tmp81 + tmp84; + { + fftw_real tmp108; + fftw_real tmp111; + fftw_real tmp218; + fftw_real tmp219; + ASSERT_ALIGNED_DOUBLE; + tmp108 = tmp106 - tmp107; + tmp111 = tmp109 + tmp110; + tmp112 = (K923879532 * tmp108) - (K382683432 * tmp111); + tmp185 = (K923879532 * tmp111) + (K382683432 * tmp108); + tmp218 = tmp106 + tmp107; + tmp219 = tmp110 - tmp109; + tmp220 = (K382683432 * tmp218) - (K923879532 * tmp219); + tmp245 = (K382683432 * tmp219) + (K923879532 * tmp218); + } + } + { + fftw_real tmp25; + fftw_real tmp116; + fftw_real tmp88; + fftw_real tmp114; + fftw_real tmp28; + fftw_real tmp113; + fftw_real tmp91; + fftw_real tmp117; + fftw_real tmp319; + fftw_real tmp320; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp23; + fftw_real tmp24; + fftw_real tmp86; + fftw_real tmp87; + ASSERT_ALIGNED_DOUBLE; + tmp23 = c_re(input[30 * istride]); + tmp24 = c_re(input[14 * istride]); + tmp25 = tmp23 + tmp24; + tmp116 = tmp23 - tmp24; + tmp86 = c_im(input[30 * istride]); + tmp87 = c_im(input[14 * istride]); + tmp88 = tmp86 + tmp87; + tmp114 = tmp86 - tmp87; + } + { + fftw_real tmp26; + fftw_real tmp27; + fftw_real tmp89; + fftw_real tmp90; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(input[6 * istride]); + tmp27 = c_re(input[22 * istride]); + tmp28 = tmp26 + tmp27; + tmp113 = tmp26 - tmp27; + tmp89 = c_im(input[6 * istride]); + tmp90 = c_im(input[22 * istride]); + tmp91 = tmp89 + tmp90; + tmp117 = tmp89 - tmp90; + } + tmp29 = tmp25 + tmp28; + tmp319 = tmp25 - tmp28; + tmp320 = tmp88 - tmp91; + tmp321 = tmp319 + tmp320; + tmp342 = tmp320 - tmp319; + tmp92 = tmp88 + tmp91; + { + fftw_real tmp115; + fftw_real tmp118; + fftw_real tmp221; + fftw_real tmp222; + ASSERT_ALIGNED_DOUBLE; + tmp115 = tmp113 + tmp114; + tmp118 = tmp116 - tmp117; + tmp119 = (K382683432 * tmp115) + (K923879532 * tmp118); + tmp184 = (K923879532 * tmp115) - (K382683432 * tmp118); + tmp221 = tmp114 - tmp113; + tmp222 = tmp116 + tmp117; + tmp223 = (K923879532 * tmp221) + (K382683432 * tmp222); + tmp244 = (K382683432 * tmp221) - (K923879532 * tmp222); + } + } + { + fftw_real tmp34; + fftw_real tmp122; + fftw_real tmp142; + fftw_real tmp279; + fftw_real tmp37; + fftw_real tmp139; + fftw_real tmp125; + fftw_real tmp280; + fftw_real tmp41; + fftw_real tmp127; + fftw_real tmp130; + fftw_real tmp282; + fftw_real tmp44; + fftw_real tmp132; + fftw_real tmp135; + fftw_real tmp283; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp32; + fftw_real tmp33; + fftw_real tmp140; + fftw_real tmp141; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(input[istride]); + tmp33 = c_re(input[17 * istride]); + tmp34 = tmp32 + tmp33; + tmp122 = tmp32 - tmp33; + tmp140 = c_im(input[istride]); + tmp141 = c_im(input[17 * istride]); + tmp142 = tmp140 - tmp141; + tmp279 = tmp140 + tmp141; + } + { + fftw_real tmp35; + fftw_real tmp36; + fftw_real tmp123; + fftw_real tmp124; + ASSERT_ALIGNED_DOUBLE; + tmp35 = c_re(input[9 * istride]); + tmp36 = c_re(input[25 * istride]); + tmp37 = tmp35 + tmp36; + tmp139 = tmp35 - tmp36; + tmp123 = c_im(input[9 * istride]); + tmp124 = c_im(input[25 * istride]); + tmp125 = tmp123 - tmp124; + tmp280 = tmp123 + tmp124; + } + { + fftw_real tmp39; + fftw_real tmp40; + fftw_real tmp128; + fftw_real tmp129; + ASSERT_ALIGNED_DOUBLE; + tmp39 = c_re(input[5 * istride]); + tmp40 = c_re(input[21 * istride]); + tmp41 = tmp39 + tmp40; + tmp127 = tmp39 - tmp40; + tmp128 = c_im(input[5 * istride]); + tmp129 = c_im(input[21 * istride]); + tmp130 = tmp128 - tmp129; + tmp282 = tmp128 + tmp129; + } + { + fftw_real tmp42; + fftw_real tmp43; + fftw_real tmp133; + fftw_real tmp134; + ASSERT_ALIGNED_DOUBLE; + tmp42 = c_re(input[29 * istride]); + tmp43 = c_re(input[13 * istride]); + tmp44 = tmp42 + tmp43; + tmp132 = tmp42 - tmp43; + tmp133 = c_im(input[29 * istride]); + tmp134 = c_im(input[13 * istride]); + tmp135 = tmp133 - tmp134; + tmp283 = tmp133 + tmp134; + } + { + fftw_real tmp327; + fftw_real tmp328; + fftw_real tmp324; + fftw_real tmp325; + ASSERT_ALIGNED_DOUBLE; + tmp126 = tmp122 - tmp125; + tmp229 = tmp122 + tmp125; + tmp38 = tmp34 + tmp37; + tmp45 = tmp41 + tmp44; + tmp278 = tmp38 - tmp45; + tmp327 = tmp34 - tmp37; + tmp328 = tmp283 - tmp282; + tmp329 = tmp327 - tmp328; + tmp357 = tmp327 + tmp328; + tmp281 = tmp279 + tmp280; + tmp284 = tmp282 + tmp283; + tmp285 = tmp281 - tmp284; + tmp143 = tmp139 + tmp142; + tmp226 = tmp142 - tmp139; + tmp324 = tmp279 - tmp280; + tmp325 = tmp41 - tmp44; + tmp326 = tmp324 - tmp325; + tmp356 = tmp325 + tmp324; + { + fftw_real tmp131; + fftw_real tmp136; + fftw_real tmp144; + fftw_real tmp145; + ASSERT_ALIGNED_DOUBLE; + tmp131 = tmp127 - tmp130; + tmp136 = tmp132 + tmp135; + tmp137 = K707106781 * (tmp131 + tmp136); + tmp227 = K707106781 * (tmp131 - tmp136); + tmp144 = tmp127 + tmp130; + tmp145 = tmp135 - tmp132; + tmp146 = K707106781 * (tmp144 + tmp145); + tmp230 = K707106781 * (tmp145 - tmp144); + } + } + } + { + fftw_real tmp277; + fftw_real tmp301; + fftw_real tmp304; + fftw_real tmp306; + fftw_real tmp296; + fftw_real tmp300; + fftw_real tmp299; + fftw_real tmp305; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp275; + fftw_real tmp276; + fftw_real tmp302; + fftw_real tmp303; + ASSERT_ALIGNED_DOUBLE; + tmp275 = tmp70 - tmp77; + tmp276 = tmp22 - tmp29; + tmp277 = tmp275 - tmp276; + tmp301 = tmp276 + tmp275; + tmp302 = tmp278 + tmp285; + tmp303 = tmp294 - tmp287; + tmp304 = K707106781 * (tmp302 + tmp303); + tmp306 = K707106781 * (tmp303 - tmp302); + } + { + fftw_real tmp286; + fftw_real tmp295; + fftw_real tmp297; + fftw_real tmp298; + ASSERT_ALIGNED_DOUBLE; + tmp286 = tmp278 - tmp285; + tmp295 = tmp287 + tmp294; + tmp296 = K707106781 * (tmp286 - tmp295); + tmp300 = K707106781 * (tmp286 + tmp295); + tmp297 = tmp7 - tmp14; + tmp298 = tmp92 - tmp85; + tmp299 = tmp297 + tmp298; + tmp305 = tmp297 - tmp298; + } + c_im(output[28 * ostride]) = tmp277 - tmp296; + c_im(output[12 * ostride]) = tmp277 + tmp296; + c_re(output[20 * ostride]) = tmp299 - tmp300; + c_re(output[4 * ostride]) = tmp299 + tmp300; + c_im(output[20 * ostride]) = tmp301 - tmp304; + c_im(output[4 * ostride]) = tmp301 + tmp304; + c_re(output[28 * ostride]) = tmp305 - tmp306; + c_re(output[12 * ostride]) = tmp305 + tmp306; + } + { + fftw_real tmp31; + fftw_real tmp311; + fftw_real tmp310; + fftw_real tmp312; + fftw_real tmp62; + fftw_real tmp63; + fftw_real tmp94; + fftw_real tmp307; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp15; + fftw_real tmp30; + fftw_real tmp308; + fftw_real tmp309; + ASSERT_ALIGNED_DOUBLE; + tmp15 = tmp7 + tmp14; + tmp30 = tmp22 + tmp29; + tmp31 = tmp15 + tmp30; + tmp311 = tmp15 - tmp30; + tmp308 = tmp281 + tmp284; + tmp309 = tmp290 + tmp293; + tmp310 = tmp308 + tmp309; + tmp312 = tmp309 - tmp308; + } + { + fftw_real tmp46; + fftw_real tmp61; + fftw_real tmp78; + fftw_real tmp93; + ASSERT_ALIGNED_DOUBLE; + tmp46 = tmp38 + tmp45; + tmp61 = tmp53 + tmp60; + tmp62 = tmp46 + tmp61; + tmp63 = tmp46 - tmp61; + tmp78 = tmp70 + tmp77; + tmp93 = tmp85 + tmp92; + tmp94 = tmp78 - tmp93; + tmp307 = tmp78 + tmp93; + } + c_re(output[16 * ostride]) = tmp31 - tmp62; + c_re(output[0]) = tmp31 + tmp62; + c_im(output[8 * ostride]) = tmp63 + tmp94; + c_im(output[24 * ostride]) = tmp94 - tmp63; + c_im(output[16 * ostride]) = tmp307 - tmp310; + c_im(output[0]) = tmp307 + tmp310; + c_re(output[24 * ostride]) = tmp311 - tmp312; + c_re(output[8 * ostride]) = tmp311 + tmp312; + } + { + fftw_real tmp121; + fftw_real tmp189; + fftw_real tmp187; + fftw_real tmp193; + fftw_real tmp148; + fftw_real tmp190; + fftw_real tmp175; + fftw_real tmp191; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp105; + fftw_real tmp120; + fftw_real tmp183; + fftw_real tmp186; + ASSERT_ALIGNED_DOUBLE; + tmp105 = tmp97 - tmp104; + tmp120 = tmp112 - tmp119; + tmp121 = tmp105 - tmp120; + tmp189 = tmp105 + tmp120; + tmp183 = tmp179 - tmp182; + tmp186 = tmp184 - tmp185; + tmp187 = tmp183 + tmp186; + tmp193 = tmp183 - tmp186; + } + { + fftw_real tmp138; + fftw_real tmp147; + fftw_real tmp165; + fftw_real tmp174; + ASSERT_ALIGNED_DOUBLE; + tmp138 = tmp126 - tmp137; + tmp147 = tmp143 - tmp146; + tmp148 = (K555570233 * tmp138) - (K831469612 * tmp147); + tmp190 = (K831469612 * tmp138) + (K555570233 * tmp147); + tmp165 = tmp153 - tmp164; + tmp174 = tmp170 - tmp173; + tmp175 = (K555570233 * tmp165) + (K831469612 * tmp174); + tmp191 = (K555570233 * tmp174) - (K831469612 * tmp165); + } + { + fftw_real tmp176; + fftw_real tmp188; + fftw_real tmp192; + fftw_real tmp194; + ASSERT_ALIGNED_DOUBLE; + tmp176 = tmp148 - tmp175; + c_im(output[29 * ostride]) = tmp121 - tmp176; + c_im(output[13 * ostride]) = tmp121 + tmp176; + tmp188 = tmp148 + tmp175; + c_re(output[21 * ostride]) = tmp187 - tmp188; + c_re(output[5 * ostride]) = tmp187 + tmp188; + tmp192 = tmp190 + tmp191; + c_im(output[21 * ostride]) = tmp189 - tmp192; + c_im(output[5 * ostride]) = tmp189 + tmp192; + tmp194 = tmp191 - tmp190; + c_re(output[29 * ostride]) = tmp193 - tmp194; + c_re(output[13 * ostride]) = tmp193 + tmp194; + } + } + { + fftw_real tmp197; + fftw_real tmp209; + fftw_real tmp207; + fftw_real tmp213; + fftw_real tmp200; + fftw_real tmp210; + fftw_real tmp203; + fftw_real tmp211; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp195; + fftw_real tmp196; + fftw_real tmp205; + fftw_real tmp206; + ASSERT_ALIGNED_DOUBLE; + tmp195 = tmp97 + tmp104; + tmp196 = tmp185 + tmp184; + tmp197 = tmp195 - tmp196; + tmp209 = tmp195 + tmp196; + tmp205 = tmp179 + tmp182; + tmp206 = tmp112 + tmp119; + tmp207 = tmp205 + tmp206; + tmp213 = tmp205 - tmp206; + } + { + fftw_real tmp198; + fftw_real tmp199; + fftw_real tmp201; + fftw_real tmp202; + ASSERT_ALIGNED_DOUBLE; + tmp198 = tmp126 + tmp137; + tmp199 = tmp143 + tmp146; + tmp200 = (K980785280 * tmp198) - (K195090322 * tmp199); + tmp210 = (K195090322 * tmp198) + (K980785280 * tmp199); + tmp201 = tmp153 + tmp164; + tmp202 = tmp170 + tmp173; + tmp203 = (K980785280 * tmp201) + (K195090322 * tmp202); + tmp211 = (K980785280 * tmp202) - (K195090322 * tmp201); + } + { + fftw_real tmp204; + fftw_real tmp208; + fftw_real tmp212; + fftw_real tmp214; + ASSERT_ALIGNED_DOUBLE; + tmp204 = tmp200 - tmp203; + c_im(output[25 * ostride]) = tmp197 - tmp204; + c_im(output[9 * ostride]) = tmp197 + tmp204; + tmp208 = tmp200 + tmp203; + c_re(output[17 * ostride]) = tmp207 - tmp208; + c_re(output[ostride]) = tmp207 + tmp208; + tmp212 = tmp210 + tmp211; + c_im(output[17 * ostride]) = tmp209 - tmp212; + c_im(output[ostride]) = tmp209 + tmp212; + tmp214 = tmp211 - tmp210; + c_re(output[25 * ostride]) = tmp213 - tmp214; + c_re(output[9 * ostride]) = tmp213 + tmp214; + } + } + { + fftw_real tmp323; + fftw_real tmp347; + fftw_real tmp350; + fftw_real tmp352; + fftw_real tmp338; + fftw_real tmp346; + fftw_real tmp345; + fftw_real tmp351; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp315; + fftw_real tmp322; + fftw_real tmp348; + fftw_real tmp349; + ASSERT_ALIGNED_DOUBLE; + tmp315 = tmp313 - tmp314; + tmp322 = K707106781 * (tmp318 - tmp321); + tmp323 = tmp315 + tmp322; + tmp347 = tmp315 - tmp322; + tmp348 = (K382683432 * tmp329) - (K923879532 * tmp326); + tmp349 = (K923879532 * tmp333) + (K382683432 * tmp336); + tmp350 = tmp348 - tmp349; + tmp352 = tmp348 + tmp349; + } + { + fftw_real tmp330; + fftw_real tmp337; + fftw_real tmp341; + fftw_real tmp344; + ASSERT_ALIGNED_DOUBLE; + tmp330 = (K382683432 * tmp326) + (K923879532 * tmp329); + tmp337 = (K382683432 * tmp333) - (K923879532 * tmp336); + tmp338 = tmp330 + tmp337; + tmp346 = tmp337 - tmp330; + tmp341 = tmp339 - tmp340; + tmp344 = K707106781 * (tmp342 - tmp343); + tmp345 = tmp341 - tmp344; + tmp351 = tmp341 + tmp344; + } + c_im(output[22 * ostride]) = tmp323 - tmp338; + c_im(output[6 * ostride]) = tmp323 + tmp338; + c_re(output[30 * ostride]) = tmp345 - tmp346; + c_re(output[14 * ostride]) = tmp345 + tmp346; + c_im(output[30 * ostride]) = tmp347 - tmp350; + c_im(output[14 * ostride]) = tmp347 + tmp350; + c_re(output[22 * ostride]) = tmp351 - tmp352; + c_re(output[6 * ostride]) = tmp351 + tmp352; + } + { + fftw_real tmp355; + fftw_real tmp367; + fftw_real tmp370; + fftw_real tmp372; + fftw_real tmp362; + fftw_real tmp366; + fftw_real tmp365; + fftw_real tmp371; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp353; + fftw_real tmp354; + fftw_real tmp368; + fftw_real tmp369; + ASSERT_ALIGNED_DOUBLE; + tmp353 = tmp314 + tmp313; + tmp354 = K707106781 * (tmp343 + tmp342); + tmp355 = tmp353 + tmp354; + tmp367 = tmp353 - tmp354; + tmp368 = (K923879532 * tmp357) - (K382683432 * tmp356); + tmp369 = (K382683432 * tmp359) + (K923879532 * tmp360); + tmp370 = tmp368 - tmp369; + tmp372 = tmp368 + tmp369; + } + { + fftw_real tmp358; + fftw_real tmp361; + fftw_real tmp363; + fftw_real tmp364; + ASSERT_ALIGNED_DOUBLE; + tmp358 = (K923879532 * tmp356) + (K382683432 * tmp357); + tmp361 = (K923879532 * tmp359) - (K382683432 * tmp360); + tmp362 = tmp358 + tmp361; + tmp366 = tmp361 - tmp358; + tmp363 = tmp339 + tmp340; + tmp364 = K707106781 * (tmp318 + tmp321); + tmp365 = tmp363 - tmp364; + tmp371 = tmp363 + tmp364; + } + c_im(output[18 * ostride]) = tmp355 - tmp362; + c_im(output[2 * ostride]) = tmp355 + tmp362; + c_re(output[26 * ostride]) = tmp365 - tmp366; + c_re(output[10 * ostride]) = tmp365 + tmp366; + c_im(output[26 * ostride]) = tmp367 - tmp370; + c_im(output[10 * ostride]) = tmp367 + tmp370; + c_re(output[18 * ostride]) = tmp371 - tmp372; + c_re(output[2 * ostride]) = tmp371 + tmp372; + } + { + fftw_real tmp225; + fftw_real tmp249; + fftw_real tmp247; + fftw_real tmp253; + fftw_real tmp232; + fftw_real tmp250; + fftw_real tmp239; + fftw_real tmp251; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp217; + fftw_real tmp224; + fftw_real tmp243; + fftw_real tmp246; + ASSERT_ALIGNED_DOUBLE; + tmp217 = tmp215 - tmp216; + tmp224 = tmp220 - tmp223; + tmp225 = tmp217 + tmp224; + tmp249 = tmp217 - tmp224; + tmp243 = tmp241 - tmp242; + tmp246 = tmp244 - tmp245; + tmp247 = tmp243 - tmp246; + tmp253 = tmp243 + tmp246; + } + { + fftw_real tmp228; + fftw_real tmp231; + fftw_real tmp235; + fftw_real tmp238; + ASSERT_ALIGNED_DOUBLE; + tmp228 = tmp226 - tmp227; + tmp231 = tmp229 - tmp230; + tmp232 = (K195090322 * tmp228) + (K980785280 * tmp231); + tmp250 = (K195090322 * tmp231) - (K980785280 * tmp228); + tmp235 = tmp233 - tmp234; + tmp238 = tmp236 - tmp237; + tmp239 = (K195090322 * tmp235) - (K980785280 * tmp238); + tmp251 = (K980785280 * tmp235) + (K195090322 * tmp238); + } + { + fftw_real tmp240; + fftw_real tmp248; + fftw_real tmp252; + fftw_real tmp254; + ASSERT_ALIGNED_DOUBLE; + tmp240 = tmp232 + tmp239; + c_im(output[23 * ostride]) = tmp225 - tmp240; + c_im(output[7 * ostride]) = tmp225 + tmp240; + tmp248 = tmp239 - tmp232; + c_re(output[31 * ostride]) = tmp247 - tmp248; + c_re(output[15 * ostride]) = tmp247 + tmp248; + tmp252 = tmp250 - tmp251; + c_im(output[31 * ostride]) = tmp249 - tmp252; + c_im(output[15 * ostride]) = tmp249 + tmp252; + tmp254 = tmp250 + tmp251; + c_re(output[23 * ostride]) = tmp253 - tmp254; + c_re(output[7 * ostride]) = tmp253 + tmp254; + } + } + { + fftw_real tmp257; + fftw_real tmp269; + fftw_real tmp267; + fftw_real tmp273; + fftw_real tmp260; + fftw_real tmp270; + fftw_real tmp263; + fftw_real tmp271; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp255; + fftw_real tmp256; + fftw_real tmp265; + fftw_real tmp266; + ASSERT_ALIGNED_DOUBLE; + tmp255 = tmp215 + tmp216; + tmp256 = tmp245 + tmp244; + tmp257 = tmp255 + tmp256; + tmp269 = tmp255 - tmp256; + tmp265 = tmp241 + tmp242; + tmp266 = tmp220 + tmp223; + tmp267 = tmp265 - tmp266; + tmp273 = tmp265 + tmp266; + } + { + fftw_real tmp258; + fftw_real tmp259; + fftw_real tmp261; + fftw_real tmp262; + ASSERT_ALIGNED_DOUBLE; + tmp258 = tmp226 + tmp227; + tmp259 = tmp229 + tmp230; + tmp260 = (K831469612 * tmp258) + (K555570233 * tmp259); + tmp270 = (K831469612 * tmp259) - (K555570233 * tmp258); + tmp261 = tmp233 + tmp234; + tmp262 = tmp236 + tmp237; + tmp263 = (K831469612 * tmp261) - (K555570233 * tmp262); + tmp271 = (K555570233 * tmp261) + (K831469612 * tmp262); + } + { + fftw_real tmp264; + fftw_real tmp268; + fftw_real tmp272; + fftw_real tmp274; + ASSERT_ALIGNED_DOUBLE; + tmp264 = tmp260 + tmp263; + c_im(output[19 * ostride]) = tmp257 - tmp264; + c_im(output[3 * ostride]) = tmp257 + tmp264; + tmp268 = tmp263 - tmp260; + c_re(output[27 * ostride]) = tmp267 - tmp268; + c_re(output[11 * ostride]) = tmp267 + tmp268; + tmp272 = tmp270 - tmp271; + c_im(output[27 * ostride]) = tmp269 - tmp272; + c_im(output[11 * ostride]) = tmp269 + tmp272; + tmp274 = tmp270 + tmp271; + c_re(output[19 * ostride]) = tmp273 - tmp274; + c_re(output[3 * ostride]) = tmp273 + tmp274; + } + } +} + +fftw_codelet_desc fftwi_no_twiddle_32_desc = { + "fftwi_no_twiddle_32", + (void (*)()) fftwi_no_twiddle_32, + 32, + FFTW_BACKWARD, + FFTW_NOTW, + 716, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_4.c b/src/fftw/fni_4.c new file mode 100644 index 0000000..f20547e --- /dev/null +++ b/src/fftw/fni_4.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:18 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 4 */ + +/* + * This function contains 16 FP additions, 0 FP multiplications, + * (or, 16 additions, 0 multiplications, 0 fused multiply/add), + * 12 stack variables, and 16 memory accesses + */ + +/* + * Generator Id's : + * $Id: fni_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_4(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp3; + fftw_real tmp11; + fftw_real tmp10; + fftw_real tmp15; + fftw_real tmp6; + fftw_real tmp7; + fftw_real tmp14; + fftw_real tmp16; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp8; + fftw_real tmp9; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[2 * istride]); + tmp3 = tmp1 + tmp2; + tmp11 = tmp1 - tmp2; + tmp8 = c_im(input[0]); + tmp9 = c_im(input[2 * istride]); + tmp10 = tmp8 - tmp9; + tmp15 = tmp8 + tmp9; + } + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp12; + fftw_real tmp13; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[istride]); + tmp5 = c_re(input[3 * istride]); + tmp6 = tmp4 + tmp5; + tmp7 = tmp4 - tmp5; + tmp12 = c_im(input[istride]); + tmp13 = c_im(input[3 * istride]); + tmp14 = tmp12 - tmp13; + tmp16 = tmp12 + tmp13; + } + c_re(output[2 * ostride]) = tmp3 - tmp6; + c_re(output[0]) = tmp3 + tmp6; + c_im(output[ostride]) = tmp7 + tmp10; + c_im(output[3 * ostride]) = tmp10 - tmp7; + c_re(output[ostride]) = tmp11 - tmp14; + c_re(output[3 * ostride]) = tmp11 + tmp14; + c_im(output[2 * ostride]) = tmp15 - tmp16; + c_im(output[0]) = tmp15 + tmp16; +} + +fftw_codelet_desc fftwi_no_twiddle_4_desc = { + "fftwi_no_twiddle_4", + (void (*)()) fftwi_no_twiddle_4, + 4, + FFTW_BACKWARD, + FFTW_NOTW, + 100, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_5.c b/src/fftw/fni_5.c new file mode 100644 index 0000000..6da791f --- /dev/null +++ b/src/fftw/fni_5.c @@ -0,0 +1,150 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:18 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 5 */ + +/* + * This function contains 32 FP additions, 12 FP multiplications, + * (or, 26 additions, 6 multiplications, 6 fused multiply/add), + * 16 stack variables, and 20 memory accesses + */ +static const fftw_real K250000000 = +FFTW_KONST(+0.250000000000000000000000000000000000000000000); +static const fftw_real K951056516 = +FFTW_KONST(+0.951056516295153572116439333379382143405698634); +static const fftw_real K587785252 = +FFTW_KONST(+0.587785252292473129168705954639072768597652438); +static const fftw_real K559016994 = +FFTW_KONST(+0.559016994374947424102293417182819058860154590); + +/* + * Generator Id's : + * $Id: fni_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_5(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp27; + fftw_real tmp8; + fftw_real tmp10; + fftw_real tmp21; + fftw_real tmp22; + fftw_real tmp14; + fftw_real tmp28; + fftw_real tmp26; + fftw_real tmp17; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp27 = c_im(input[0]); + { + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp6; + fftw_real tmp7; + ASSERT_ALIGNED_DOUBLE; + tmp2 = c_re(input[istride]); + tmp3 = c_re(input[4 * istride]); + tmp4 = tmp2 + tmp3; + tmp5 = c_re(input[2 * istride]); + tmp6 = c_re(input[3 * istride]); + tmp7 = tmp5 + tmp6; + tmp8 = tmp4 + tmp7; + tmp10 = K559016994 * (tmp4 - tmp7); + tmp21 = tmp2 - tmp3; + tmp22 = tmp5 - tmp6; + } + { + fftw_real tmp12; + fftw_real tmp13; + fftw_real tmp24; + fftw_real tmp15; + fftw_real tmp16; + fftw_real tmp25; + ASSERT_ALIGNED_DOUBLE; + tmp12 = c_im(input[istride]); + tmp13 = c_im(input[4 * istride]); + tmp24 = tmp12 + tmp13; + tmp15 = c_im(input[2 * istride]); + tmp16 = c_im(input[3 * istride]); + tmp25 = tmp15 + tmp16; + tmp14 = tmp12 - tmp13; + tmp28 = tmp24 + tmp25; + tmp26 = K559016994 * (tmp24 - tmp25); + tmp17 = tmp15 - tmp16; + } + c_re(output[0]) = tmp1 + tmp8; + { + fftw_real tmp18; + fftw_real tmp20; + fftw_real tmp11; + fftw_real tmp19; + fftw_real tmp9; + ASSERT_ALIGNED_DOUBLE; + tmp18 = (K587785252 * tmp14) - (K951056516 * tmp17); + tmp20 = (K951056516 * tmp14) + (K587785252 * tmp17); + tmp9 = tmp1 - (K250000000 * tmp8); + tmp11 = tmp9 - tmp10; + tmp19 = tmp10 + tmp9; + c_re(output[2 * ostride]) = tmp11 - tmp18; + c_re(output[3 * ostride]) = tmp11 + tmp18; + c_re(output[ostride]) = tmp19 - tmp20; + c_re(output[4 * ostride]) = tmp19 + tmp20; + } + c_im(output[0]) = tmp28 + tmp27; + { + fftw_real tmp23; + fftw_real tmp31; + fftw_real tmp30; + fftw_real tmp32; + fftw_real tmp29; + ASSERT_ALIGNED_DOUBLE; + tmp23 = (K951056516 * tmp21) + (K587785252 * tmp22); + tmp31 = (K587785252 * tmp21) - (K951056516 * tmp22); + tmp29 = tmp27 - (K250000000 * tmp28); + tmp30 = tmp26 + tmp29; + tmp32 = tmp29 - tmp26; + c_im(output[ostride]) = tmp23 + tmp30; + c_im(output[4 * ostride]) = tmp30 - tmp23; + c_im(output[2 * ostride]) = tmp31 + tmp32; + c_im(output[3 * ostride]) = tmp32 - tmp31; + } +} + +fftw_codelet_desc fftwi_no_twiddle_5_desc = { + "fftwi_no_twiddle_5", + (void (*)()) fftwi_no_twiddle_5, + 5, + FFTW_BACKWARD, + FFTW_NOTW, + 122, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_6.c b/src/fftw/fni_6.c new file mode 100644 index 0000000..3454303 --- /dev/null +++ b/src/fftw/fni_6.c @@ -0,0 +1,159 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:19 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 6 */ + +/* + * This function contains 36 FP additions, 8 FP multiplications, + * (or, 32 additions, 4 multiplications, 4 fused multiply/add), + * 20 stack variables, and 24 memory accesses + */ +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); + +/* + * Generator Id's : + * $Id: fni_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_6(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp3; + fftw_real tmp11; + fftw_real tmp26; + fftw_real tmp29; + fftw_real tmp6; + fftw_real tmp12; + fftw_real tmp9; + fftw_real tmp13; + fftw_real tmp10; + fftw_real tmp14; + fftw_real tmp18; + fftw_real tmp31; + fftw_real tmp21; + fftw_real tmp30; + fftw_real tmp27; + fftw_real tmp32; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp24; + fftw_real tmp25; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[3 * istride]); + tmp3 = tmp1 - tmp2; + tmp11 = tmp1 + tmp2; + tmp24 = c_im(input[0]); + tmp25 = c_im(input[3 * istride]); + tmp26 = tmp24 - tmp25; + tmp29 = tmp24 + tmp25; + } + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp7; + fftw_real tmp8; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[2 * istride]); + tmp5 = c_re(input[5 * istride]); + tmp6 = tmp4 - tmp5; + tmp12 = tmp4 + tmp5; + tmp7 = c_re(input[4 * istride]); + tmp8 = c_re(input[istride]); + tmp9 = tmp7 - tmp8; + tmp13 = tmp7 + tmp8; + } + tmp10 = tmp6 + tmp9; + tmp14 = tmp12 + tmp13; + { + fftw_real tmp16; + fftw_real tmp17; + fftw_real tmp19; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + tmp16 = c_im(input[4 * istride]); + tmp17 = c_im(input[istride]); + tmp18 = tmp16 - tmp17; + tmp31 = tmp16 + tmp17; + tmp19 = c_im(input[2 * istride]); + tmp20 = c_im(input[5 * istride]); + tmp21 = tmp19 - tmp20; + tmp30 = tmp19 + tmp20; + } + tmp27 = tmp21 + tmp18; + tmp32 = tmp30 + tmp31; + { + fftw_real tmp15; + fftw_real tmp22; + fftw_real tmp35; + fftw_real tmp36; + ASSERT_ALIGNED_DOUBLE; + c_re(output[3 * ostride]) = tmp3 + tmp10; + tmp15 = tmp3 - (K500000000 * tmp10); + tmp22 = K866025403 * (tmp18 - tmp21); + c_re(output[5 * ostride]) = tmp15 - tmp22; + c_re(output[ostride]) = tmp15 + tmp22; + c_re(output[0]) = tmp11 + tmp14; + tmp35 = tmp11 - (K500000000 * tmp14); + tmp36 = K866025403 * (tmp31 - tmp30); + c_re(output[2 * ostride]) = tmp35 - tmp36; + c_re(output[4 * ostride]) = tmp35 + tmp36; + } + { + fftw_real tmp23; + fftw_real tmp28; + fftw_real tmp33; + fftw_real tmp34; + ASSERT_ALIGNED_DOUBLE; + c_im(output[3 * ostride]) = tmp26 + tmp27; + tmp23 = K866025403 * (tmp6 - tmp9); + tmp28 = tmp26 - (K500000000 * tmp27); + c_im(output[ostride]) = tmp23 + tmp28; + c_im(output[5 * ostride]) = tmp28 - tmp23; + c_im(output[0]) = tmp29 + tmp32; + tmp33 = tmp29 - (K500000000 * tmp32); + tmp34 = K866025403 * (tmp12 - tmp13); + c_im(output[2 * ostride]) = tmp33 - tmp34; + c_im(output[4 * ostride]) = tmp34 + tmp33; + } +} + +fftw_codelet_desc fftwi_no_twiddle_6_desc = { + "fftwi_no_twiddle_6", + (void (*)()) fftwi_no_twiddle_6, + 6, + FFTW_BACKWARD, + FFTW_NOTW, + 144, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_64.c b/src/fftw/fni_64.c new file mode 100644 index 0000000..10e8cf1 --- /dev/null +++ b/src/fftw/fni_64.c @@ -0,0 +1,2464 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:51 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 64 */ + +/* + * This function contains 912 FP additions, 248 FP multiplications, + * (or, 808 additions, 144 multiplications, 104 fused multiply/add), + * 156 stack variables, and 256 memory accesses + */ +static const fftw_real K195090322 = +FFTW_KONST(+0.195090322016128267848284868477022240927691618); +static const fftw_real K980785280 = +FFTW_KONST(+0.980785280403230449126182236134239036973933731); +static const fftw_real K773010453 = +FFTW_KONST(+0.773010453362736960810906609758469800971041293); +static const fftw_real K634393284 = +FFTW_KONST(+0.634393284163645498215171613225493370675687095); +static const fftw_real K098017140 = +FFTW_KONST(+0.098017140329560601994195563888641845861136673); +static const fftw_real K995184726 = +FFTW_KONST(+0.995184726672196886244836953109479921575474869); +static const fftw_real K831469612 = +FFTW_KONST(+0.831469612302545237078788377617905756738560812); +static const fftw_real K555570233 = +FFTW_KONST(+0.555570233019602224742830813948532874374937191); +static const fftw_real K956940335 = +FFTW_KONST(+0.956940335732208864935797886980269969482849206); +static const fftw_real K290284677 = +FFTW_KONST(+0.290284677254462367636192375817395274691476278); +static const fftw_real K471396736 = +FFTW_KONST(+0.471396736825997648556387625905254377657460319); +static const fftw_real K881921264 = +FFTW_KONST(+0.881921264348355029712756863660388349508442621); +static const fftw_real K382683432 = +FFTW_KONST(+0.382683432365089771728459984030398866761344562); +static const fftw_real K923879532 = +FFTW_KONST(+0.923879532511286756128183189396788286822416626); +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: fni_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_64(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp193; + fftw_real tmp471; + fftw_real tmp15; + fftw_real tmp879; + fftw_real tmp719; + fftw_real tmp781; + fftw_real tmp142; + fftw_real tmp853; + fftw_real tmp371; + fftw_real tmp537; + fftw_real tmp637; + fftw_real tmp755; + fftw_real tmp200; + fftw_real tmp538; + fftw_real tmp374; + fftw_real tmp472; + fftw_real tmp109; + fftw_real tmp874; + fftw_real tmp693; + fftw_real tmp776; + fftw_real tmp830; + fftw_real tmp871; + fftw_real tmp710; + fftw_real tmp773; + fftw_real tmp329; + fftw_real tmp432; + fftw_real tmp519; + fftw_real tmp596; + fftw_real tmp362; + fftw_real tmp429; + fftw_real tmp530; + fftw_real tmp593; + fftw_real tmp30; + fftw_real tmp854; + fftw_real tmp640; + fftw_real tmp720; + fftw_real tmp157; + fftw_real tmp880; + fftw_real tmp643; + fftw_real tmp721; + fftw_real tmp208; + fftw_real tmp377; + fftw_real tmp476; + fftw_real tmp541; + fftw_real tmp215; + fftw_real tmp376; + fftw_real tmp479; + fftw_real tmp540; + fftw_real tmp124; + fftw_real tmp872; + fftw_real tmp365; + fftw_real tmp433; + fftw_real tmp352; + fftw_real tmp430; + fftw_real tmp833; + fftw_real tmp875; + fftw_real tmp526; + fftw_real tmp594; + fftw_real tmp533; + fftw_real tmp597; + fftw_real tmp704; + fftw_real tmp774; + fftw_real tmp713; + fftw_real tmp777; + fftw_real tmp46; + fftw_real tmp856; + fftw_real tmp648; + fftw_real tmp758; + fftw_real tmp173; + fftw_real tmp857; + fftw_real tmp651; + fftw_real tmp759; + fftw_real tmp228; + fftw_real tmp414; + fftw_real tmp484; + fftw_real tmp578; + fftw_real tmp235; + fftw_real tmp415; + fftw_real tmp487; + fftw_real tmp579; + fftw_real tmp78; + fftw_real tmp867; + fftw_real tmp666; + fftw_real tmp769; + fftw_real tmp821; + fftw_real tmp864; + fftw_real tmp683; + fftw_real tmp766; + fftw_real tmp274; + fftw_real tmp425; + fftw_real tmp500; + fftw_real tmp589; + fftw_real tmp307; + fftw_real tmp422; + fftw_real tmp511; + fftw_real tmp586; + fftw_real tmp61; + fftw_real tmp859; + fftw_real tmp655; + fftw_real tmp761; + fftw_real tmp188; + fftw_real tmp860; + fftw_real tmp658; + fftw_real tmp762; + fftw_real tmp247; + fftw_real tmp417; + fftw_real tmp491; + fftw_real tmp581; + fftw_real tmp254; + fftw_real tmp418; + fftw_real tmp494; + fftw_real tmp582; + fftw_real tmp93; + fftw_real tmp865; + fftw_real tmp310; + fftw_real tmp426; + fftw_real tmp297; + fftw_real tmp423; + fftw_real tmp824; + fftw_real tmp868; + fftw_real tmp507; + fftw_real tmp587; + fftw_real tmp514; + fftw_real tmp590; + fftw_real tmp677; + fftw_real tmp767; + fftw_real tmp686; + fftw_real tmp770; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp3; + fftw_real tmp369; + fftw_real tmp130; + fftw_real tmp192; + fftw_real tmp6; + fftw_real tmp191; + fftw_real tmp133; + fftw_real tmp370; + fftw_real tmp10; + fftw_real tmp194; + fftw_real tmp137; + fftw_real tmp195; + fftw_real tmp13; + fftw_real tmp198; + fftw_real tmp140; + fftw_real tmp197; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp128; + fftw_real tmp129; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[32 * istride]); + tmp3 = tmp1 + tmp2; + tmp369 = tmp1 - tmp2; + tmp128 = c_im(input[0]); + tmp129 = c_im(input[32 * istride]); + tmp130 = tmp128 + tmp129; + tmp192 = tmp128 - tmp129; + } + { + fftw_real tmp4; + fftw_real tmp5; + fftw_real tmp131; + fftw_real tmp132; + ASSERT_ALIGNED_DOUBLE; + tmp4 = c_re(input[16 * istride]); + tmp5 = c_re(input[48 * istride]); + tmp6 = tmp4 + tmp5; + tmp191 = tmp4 - tmp5; + tmp131 = c_im(input[16 * istride]); + tmp132 = c_im(input[48 * istride]); + tmp133 = tmp131 + tmp132; + tmp370 = tmp131 - tmp132; + } + { + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp135; + fftw_real tmp136; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(input[8 * istride]); + tmp9 = c_re(input[40 * istride]); + tmp10 = tmp8 + tmp9; + tmp194 = tmp8 - tmp9; + tmp135 = c_im(input[8 * istride]); + tmp136 = c_im(input[40 * istride]); + tmp137 = tmp135 + tmp136; + tmp195 = tmp135 - tmp136; + } + { + fftw_real tmp11; + fftw_real tmp12; + fftw_real tmp138; + fftw_real tmp139; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[56 * istride]); + tmp12 = c_re(input[24 * istride]); + tmp13 = tmp11 + tmp12; + tmp198 = tmp11 - tmp12; + tmp138 = c_im(input[56 * istride]); + tmp139 = c_im(input[24 * istride]); + tmp140 = tmp138 + tmp139; + tmp197 = tmp138 - tmp139; + } + { + fftw_real tmp7; + fftw_real tmp14; + fftw_real tmp635; + fftw_real tmp636; + ASSERT_ALIGNED_DOUBLE; + tmp193 = tmp191 + tmp192; + tmp471 = tmp192 - tmp191; + tmp7 = tmp3 + tmp6; + tmp14 = tmp10 + tmp13; + tmp15 = tmp7 + tmp14; + tmp879 = tmp7 - tmp14; + { + fftw_real tmp717; + fftw_real tmp718; + fftw_real tmp134; + fftw_real tmp141; + ASSERT_ALIGNED_DOUBLE; + tmp717 = tmp3 - tmp6; + tmp718 = tmp140 - tmp137; + tmp719 = tmp717 + tmp718; + tmp781 = tmp717 - tmp718; + tmp134 = tmp130 + tmp133; + tmp141 = tmp137 + tmp140; + tmp142 = tmp134 + tmp141; + tmp853 = tmp134 - tmp141; + } + tmp371 = tmp369 - tmp370; + tmp537 = tmp369 + tmp370; + tmp635 = tmp10 - tmp13; + tmp636 = tmp130 - tmp133; + tmp637 = tmp635 + tmp636; + tmp755 = tmp636 - tmp635; + { + fftw_real tmp196; + fftw_real tmp199; + fftw_real tmp372; + fftw_real tmp373; + ASSERT_ALIGNED_DOUBLE; + tmp196 = tmp194 + tmp195; + tmp199 = tmp197 - tmp198; + tmp200 = K707106781 * (tmp196 + tmp199); + tmp538 = K707106781 * (tmp199 - tmp196); + tmp372 = tmp194 - tmp195; + tmp373 = tmp198 + tmp197; + tmp374 = K707106781 * (tmp372 + tmp373); + tmp472 = K707106781 * (tmp372 - tmp373); + } + } + } + { + fftw_real tmp97; + fftw_real tmp313; + fftw_real tmp357; + fftw_real tmp707; + fftw_real tmp100; + fftw_real tmp354; + fftw_real tmp316; + fftw_real tmp708; + fftw_real tmp107; + fftw_real tmp690; + fftw_real tmp327; + fftw_real tmp360; + fftw_real tmp104; + fftw_real tmp691; + fftw_real tmp322; + fftw_real tmp359; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp95; + fftw_real tmp96; + fftw_real tmp314; + fftw_real tmp315; + ASSERT_ALIGNED_DOUBLE; + tmp95 = c_re(input[63 * istride]); + tmp96 = c_re(input[31 * istride]); + tmp97 = tmp95 + tmp96; + tmp313 = tmp95 - tmp96; + { + fftw_real tmp355; + fftw_real tmp356; + fftw_real tmp98; + fftw_real tmp99; + ASSERT_ALIGNED_DOUBLE; + tmp355 = c_im(input[63 * istride]); + tmp356 = c_im(input[31 * istride]); + tmp357 = tmp355 - tmp356; + tmp707 = tmp355 + tmp356; + tmp98 = c_re(input[15 * istride]); + tmp99 = c_re(input[47 * istride]); + tmp100 = tmp98 + tmp99; + tmp354 = tmp98 - tmp99; + } + tmp314 = c_im(input[15 * istride]); + tmp315 = c_im(input[47 * istride]); + tmp316 = tmp314 - tmp315; + tmp708 = tmp314 + tmp315; + { + fftw_real tmp105; + fftw_real tmp106; + fftw_real tmp323; + fftw_real tmp324; + fftw_real tmp325; + fftw_real tmp326; + ASSERT_ALIGNED_DOUBLE; + tmp105 = c_re(input[55 * istride]); + tmp106 = c_re(input[23 * istride]); + tmp323 = tmp105 - tmp106; + tmp324 = c_im(input[55 * istride]); + tmp325 = c_im(input[23 * istride]); + tmp326 = tmp324 - tmp325; + tmp107 = tmp105 + tmp106; + tmp690 = tmp324 + tmp325; + tmp327 = tmp323 + tmp326; + tmp360 = tmp326 - tmp323; + } + { + fftw_real tmp102; + fftw_real tmp103; + fftw_real tmp318; + fftw_real tmp319; + fftw_real tmp320; + fftw_real tmp321; + ASSERT_ALIGNED_DOUBLE; + tmp102 = c_re(input[7 * istride]); + tmp103 = c_re(input[39 * istride]); + tmp318 = tmp102 - tmp103; + tmp319 = c_im(input[7 * istride]); + tmp320 = c_im(input[39 * istride]); + tmp321 = tmp319 - tmp320; + tmp104 = tmp102 + tmp103; + tmp691 = tmp319 + tmp320; + tmp322 = tmp318 - tmp321; + tmp359 = tmp318 + tmp321; + } + } + { + fftw_real tmp101; + fftw_real tmp108; + fftw_real tmp689; + fftw_real tmp692; + ASSERT_ALIGNED_DOUBLE; + tmp101 = tmp97 + tmp100; + tmp108 = tmp104 + tmp107; + tmp109 = tmp101 + tmp108; + tmp874 = tmp101 - tmp108; + tmp689 = tmp97 - tmp100; + tmp692 = tmp690 - tmp691; + tmp693 = tmp689 + tmp692; + tmp776 = tmp689 - tmp692; + } + { + fftw_real tmp828; + fftw_real tmp829; + fftw_real tmp706; + fftw_real tmp709; + ASSERT_ALIGNED_DOUBLE; + tmp828 = tmp707 + tmp708; + tmp829 = tmp691 + tmp690; + tmp830 = tmp828 + tmp829; + tmp871 = tmp828 - tmp829; + tmp706 = tmp104 - tmp107; + tmp709 = tmp707 - tmp708; + tmp710 = tmp706 + tmp709; + tmp773 = tmp709 - tmp706; + } + { + fftw_real tmp317; + fftw_real tmp328; + fftw_real tmp517; + fftw_real tmp518; + ASSERT_ALIGNED_DOUBLE; + tmp317 = tmp313 - tmp316; + tmp328 = K707106781 * (tmp322 + tmp327); + tmp329 = tmp317 - tmp328; + tmp432 = tmp317 + tmp328; + tmp517 = tmp313 + tmp316; + tmp518 = K707106781 * (tmp360 - tmp359); + tmp519 = tmp517 - tmp518; + tmp596 = tmp517 + tmp518; + } + { + fftw_real tmp358; + fftw_real tmp361; + fftw_real tmp528; + fftw_real tmp529; + ASSERT_ALIGNED_DOUBLE; + tmp358 = tmp354 + tmp357; + tmp361 = K707106781 * (tmp359 + tmp360); + tmp362 = tmp358 - tmp361; + tmp429 = tmp358 + tmp361; + tmp528 = tmp357 - tmp354; + tmp529 = K707106781 * (tmp322 - tmp327); + tmp530 = tmp528 - tmp529; + tmp593 = tmp528 + tmp529; + } + } + { + fftw_real tmp18; + fftw_real tmp202; + fftw_real tmp145; + fftw_real tmp206; + fftw_real tmp21; + fftw_real tmp205; + fftw_real tmp148; + fftw_real tmp203; + fftw_real tmp25; + fftw_real tmp212; + fftw_real tmp152; + fftw_real tmp210; + fftw_real tmp28; + fftw_real tmp209; + fftw_real tmp155; + fftw_real tmp213; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp16; + fftw_real tmp17; + fftw_real tmp143; + fftw_real tmp144; + ASSERT_ALIGNED_DOUBLE; + tmp16 = c_re(input[4 * istride]); + tmp17 = c_re(input[36 * istride]); + tmp18 = tmp16 + tmp17; + tmp202 = tmp16 - tmp17; + tmp143 = c_im(input[4 * istride]); + tmp144 = c_im(input[36 * istride]); + tmp145 = tmp143 + tmp144; + tmp206 = tmp143 - tmp144; + } + { + fftw_real tmp19; + fftw_real tmp20; + fftw_real tmp146; + fftw_real tmp147; + ASSERT_ALIGNED_DOUBLE; + tmp19 = c_re(input[20 * istride]); + tmp20 = c_re(input[52 * istride]); + tmp21 = tmp19 + tmp20; + tmp205 = tmp19 - tmp20; + tmp146 = c_im(input[20 * istride]); + tmp147 = c_im(input[52 * istride]); + tmp148 = tmp146 + tmp147; + tmp203 = tmp146 - tmp147; + } + { + fftw_real tmp23; + fftw_real tmp24; + fftw_real tmp150; + fftw_real tmp151; + ASSERT_ALIGNED_DOUBLE; + tmp23 = c_re(input[60 * istride]); + tmp24 = c_re(input[28 * istride]); + tmp25 = tmp23 + tmp24; + tmp212 = tmp23 - tmp24; + tmp150 = c_im(input[60 * istride]); + tmp151 = c_im(input[28 * istride]); + tmp152 = tmp150 + tmp151; + tmp210 = tmp150 - tmp151; + } + { + fftw_real tmp26; + fftw_real tmp27; + fftw_real tmp153; + fftw_real tmp154; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(input[12 * istride]); + tmp27 = c_re(input[44 * istride]); + tmp28 = tmp26 + tmp27; + tmp209 = tmp26 - tmp27; + tmp153 = c_im(input[12 * istride]); + tmp154 = c_im(input[44 * istride]); + tmp155 = tmp153 + tmp154; + tmp213 = tmp153 - tmp154; + } + { + fftw_real tmp22; + fftw_real tmp29; + fftw_real tmp638; + fftw_real tmp639; + ASSERT_ALIGNED_DOUBLE; + tmp22 = tmp18 + tmp21; + tmp29 = tmp25 + tmp28; + tmp30 = tmp22 + tmp29; + tmp854 = tmp22 - tmp29; + tmp638 = tmp18 - tmp21; + tmp639 = tmp145 - tmp148; + tmp640 = tmp638 + tmp639; + tmp720 = tmp638 - tmp639; + } + { + fftw_real tmp149; + fftw_real tmp156; + fftw_real tmp641; + fftw_real tmp642; + ASSERT_ALIGNED_DOUBLE; + tmp149 = tmp145 + tmp148; + tmp156 = tmp152 + tmp155; + tmp157 = tmp149 + tmp156; + tmp880 = tmp156 - tmp149; + tmp641 = tmp152 - tmp155; + tmp642 = tmp25 - tmp28; + tmp643 = tmp641 - tmp642; + tmp721 = tmp642 + tmp641; + } + { + fftw_real tmp204; + fftw_real tmp207; + fftw_real tmp474; + fftw_real tmp475; + ASSERT_ALIGNED_DOUBLE; + tmp204 = tmp202 - tmp203; + tmp207 = tmp205 + tmp206; + tmp208 = (K923879532 * tmp204) - (K382683432 * tmp207); + tmp377 = (K923879532 * tmp207) + (K382683432 * tmp204); + tmp474 = tmp202 + tmp203; + tmp475 = tmp206 - tmp205; + tmp476 = (K382683432 * tmp474) - (K923879532 * tmp475); + tmp541 = (K382683432 * tmp475) + (K923879532 * tmp474); + } + { + fftw_real tmp211; + fftw_real tmp214; + fftw_real tmp477; + fftw_real tmp478; + ASSERT_ALIGNED_DOUBLE; + tmp211 = tmp209 + tmp210; + tmp214 = tmp212 - tmp213; + tmp215 = (K382683432 * tmp211) + (K923879532 * tmp214); + tmp376 = (K923879532 * tmp211) - (K382683432 * tmp214); + tmp477 = tmp210 - tmp209; + tmp478 = tmp212 + tmp213; + tmp479 = (K923879532 * tmp477) + (K382683432 * tmp478); + tmp540 = (K382683432 * tmp477) - (K923879532 * tmp478); + } + } + { + fftw_real tmp112; + fftw_real tmp695; + fftw_real tmp115; + fftw_real tmp696; + fftw_real tmp345; + fftw_real tmp523; + fftw_real tmp350; + fftw_real tmp524; + fftw_real tmp697; + fftw_real tmp694; + fftw_real tmp119; + fftw_real tmp700; + fftw_real tmp122; + fftw_real tmp701; + fftw_real tmp334; + fftw_real tmp520; + fftw_real tmp339; + fftw_real tmp521; + fftw_real tmp702; + fftw_real tmp699; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp346; + fftw_real tmp344; + fftw_real tmp341; + fftw_real tmp349; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp110; + fftw_real tmp111; + fftw_real tmp342; + fftw_real tmp343; + ASSERT_ALIGNED_DOUBLE; + tmp110 = c_re(input[3 * istride]); + tmp111 = c_re(input[35 * istride]); + tmp112 = tmp110 + tmp111; + tmp346 = tmp110 - tmp111; + tmp342 = c_im(input[3 * istride]); + tmp343 = c_im(input[35 * istride]); + tmp344 = tmp342 - tmp343; + tmp695 = tmp342 + tmp343; + } + { + fftw_real tmp113; + fftw_real tmp114; + fftw_real tmp347; + fftw_real tmp348; + ASSERT_ALIGNED_DOUBLE; + tmp113 = c_re(input[19 * istride]); + tmp114 = c_re(input[51 * istride]); + tmp115 = tmp113 + tmp114; + tmp341 = tmp113 - tmp114; + tmp347 = c_im(input[19 * istride]); + tmp348 = c_im(input[51 * istride]); + tmp349 = tmp347 - tmp348; + tmp696 = tmp347 + tmp348; + } + tmp345 = tmp341 + tmp344; + tmp523 = tmp344 - tmp341; + tmp350 = tmp346 - tmp349; + tmp524 = tmp346 + tmp349; + tmp697 = tmp695 - tmp696; + tmp694 = tmp112 - tmp115; + } + { + fftw_real tmp335; + fftw_real tmp333; + fftw_real tmp330; + fftw_real tmp338; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp117; + fftw_real tmp118; + fftw_real tmp331; + fftw_real tmp332; + ASSERT_ALIGNED_DOUBLE; + tmp117 = c_re(input[59 * istride]); + tmp118 = c_re(input[27 * istride]); + tmp119 = tmp117 + tmp118; + tmp335 = tmp117 - tmp118; + tmp331 = c_im(input[59 * istride]); + tmp332 = c_im(input[27 * istride]); + tmp333 = tmp331 - tmp332; + tmp700 = tmp331 + tmp332; + } + { + fftw_real tmp120; + fftw_real tmp121; + fftw_real tmp336; + fftw_real tmp337; + ASSERT_ALIGNED_DOUBLE; + tmp120 = c_re(input[11 * istride]); + tmp121 = c_re(input[43 * istride]); + tmp122 = tmp120 + tmp121; + tmp330 = tmp120 - tmp121; + tmp336 = c_im(input[11 * istride]); + tmp337 = c_im(input[43 * istride]); + tmp338 = tmp336 - tmp337; + tmp701 = tmp336 + tmp337; + } + tmp334 = tmp330 + tmp333; + tmp520 = tmp333 - tmp330; + tmp339 = tmp335 - tmp338; + tmp521 = tmp335 + tmp338; + tmp702 = tmp700 - tmp701; + tmp699 = tmp119 - tmp122; + } + { + fftw_real tmp116; + fftw_real tmp123; + fftw_real tmp363; + fftw_real tmp364; + ASSERT_ALIGNED_DOUBLE; + tmp116 = tmp112 + tmp115; + tmp123 = tmp119 + tmp122; + tmp124 = tmp116 + tmp123; + tmp872 = tmp116 - tmp123; + tmp363 = (K923879532 * tmp350) - (K382683432 * tmp345); + tmp364 = (K382683432 * tmp334) + (K923879532 * tmp339); + tmp365 = tmp363 - tmp364; + tmp433 = tmp363 + tmp364; + } + { + fftw_real tmp340; + fftw_real tmp351; + fftw_real tmp831; + fftw_real tmp832; + ASSERT_ALIGNED_DOUBLE; + tmp340 = (K923879532 * tmp334) - (K382683432 * tmp339); + tmp351 = (K923879532 * tmp345) + (K382683432 * tmp350); + tmp352 = tmp340 - tmp351; + tmp430 = tmp351 + tmp340; + tmp831 = tmp695 + tmp696; + tmp832 = tmp700 + tmp701; + tmp833 = tmp831 + tmp832; + tmp875 = tmp832 - tmp831; + } + { + fftw_real tmp522; + fftw_real tmp525; + fftw_real tmp531; + fftw_real tmp532; + ASSERT_ALIGNED_DOUBLE; + tmp522 = (K382683432 * tmp520) - (K923879532 * tmp521); + tmp525 = (K382683432 * tmp523) + (K923879532 * tmp524); + tmp526 = tmp522 - tmp525; + tmp594 = tmp525 + tmp522; + tmp531 = (K382683432 * tmp524) - (K923879532 * tmp523); + tmp532 = (K923879532 * tmp520) + (K382683432 * tmp521); + tmp533 = tmp531 - tmp532; + tmp597 = tmp531 + tmp532; + } + { + fftw_real tmp698; + fftw_real tmp703; + fftw_real tmp711; + fftw_real tmp712; + ASSERT_ALIGNED_DOUBLE; + tmp698 = tmp694 - tmp697; + tmp703 = tmp699 + tmp702; + tmp704 = K707106781 * (tmp698 + tmp703); + tmp774 = K707106781 * (tmp698 - tmp703); + tmp711 = tmp694 + tmp697; + tmp712 = tmp702 - tmp699; + tmp713 = K707106781 * (tmp711 + tmp712); + tmp777 = K707106781 * (tmp712 - tmp711); + } + } + { + fftw_real tmp34; + fftw_real tmp218; + fftw_real tmp161; + fftw_real tmp230; + fftw_real tmp37; + fftw_real tmp229; + fftw_real tmp164; + fftw_real tmp219; + fftw_real tmp44; + fftw_real tmp233; + fftw_real tmp226; + fftw_real tmp171; + fftw_real tmp41; + fftw_real tmp232; + fftw_real tmp223; + fftw_real tmp168; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp32; + fftw_real tmp33; + fftw_real tmp162; + fftw_real tmp163; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(input[2 * istride]); + tmp33 = c_re(input[34 * istride]); + tmp34 = tmp32 + tmp33; + tmp218 = tmp32 - tmp33; + { + fftw_real tmp159; + fftw_real tmp160; + fftw_real tmp35; + fftw_real tmp36; + ASSERT_ALIGNED_DOUBLE; + tmp159 = c_im(input[2 * istride]); + tmp160 = c_im(input[34 * istride]); + tmp161 = tmp159 + tmp160; + tmp230 = tmp159 - tmp160; + tmp35 = c_re(input[18 * istride]); + tmp36 = c_re(input[50 * istride]); + tmp37 = tmp35 + tmp36; + tmp229 = tmp35 - tmp36; + } + tmp162 = c_im(input[18 * istride]); + tmp163 = c_im(input[50 * istride]); + tmp164 = tmp162 + tmp163; + tmp219 = tmp162 - tmp163; + { + fftw_real tmp42; + fftw_real tmp43; + fftw_real tmp224; + fftw_real tmp169; + fftw_real tmp170; + fftw_real tmp225; + ASSERT_ALIGNED_DOUBLE; + tmp42 = c_re(input[58 * istride]); + tmp43 = c_re(input[26 * istride]); + tmp224 = tmp42 - tmp43; + tmp169 = c_im(input[58 * istride]); + tmp170 = c_im(input[26 * istride]); + tmp225 = tmp169 - tmp170; + tmp44 = tmp42 + tmp43; + tmp233 = tmp225 - tmp224; + tmp226 = tmp224 + tmp225; + tmp171 = tmp169 + tmp170; + } + { + fftw_real tmp39; + fftw_real tmp40; + fftw_real tmp221; + fftw_real tmp166; + fftw_real tmp167; + fftw_real tmp222; + ASSERT_ALIGNED_DOUBLE; + tmp39 = c_re(input[10 * istride]); + tmp40 = c_re(input[42 * istride]); + tmp221 = tmp39 - tmp40; + tmp166 = c_im(input[10 * istride]); + tmp167 = c_im(input[42 * istride]); + tmp222 = tmp166 - tmp167; + tmp41 = tmp39 + tmp40; + tmp232 = tmp221 + tmp222; + tmp223 = tmp221 - tmp222; + tmp168 = tmp166 + tmp167; + } + } + { + fftw_real tmp38; + fftw_real tmp45; + fftw_real tmp646; + fftw_real tmp647; + ASSERT_ALIGNED_DOUBLE; + tmp38 = tmp34 + tmp37; + tmp45 = tmp41 + tmp44; + tmp46 = tmp38 + tmp45; + tmp856 = tmp38 - tmp45; + tmp646 = tmp34 - tmp37; + tmp647 = tmp171 - tmp168; + tmp648 = tmp646 + tmp647; + tmp758 = tmp646 - tmp647; + } + { + fftw_real tmp165; + fftw_real tmp172; + fftw_real tmp649; + fftw_real tmp650; + ASSERT_ALIGNED_DOUBLE; + tmp165 = tmp161 + tmp164; + tmp172 = tmp168 + tmp171; + tmp173 = tmp165 + tmp172; + tmp857 = tmp165 - tmp172; + tmp649 = tmp41 - tmp44; + tmp650 = tmp161 - tmp164; + tmp651 = tmp649 + tmp650; + tmp759 = tmp650 - tmp649; + } + { + fftw_real tmp220; + fftw_real tmp227; + fftw_real tmp482; + fftw_real tmp483; + ASSERT_ALIGNED_DOUBLE; + tmp220 = tmp218 - tmp219; + tmp227 = K707106781 * (tmp223 + tmp226); + tmp228 = tmp220 - tmp227; + tmp414 = tmp220 + tmp227; + tmp482 = tmp218 + tmp219; + tmp483 = K707106781 * (tmp233 - tmp232); + tmp484 = tmp482 - tmp483; + tmp578 = tmp482 + tmp483; + } + { + fftw_real tmp231; + fftw_real tmp234; + fftw_real tmp485; + fftw_real tmp486; + ASSERT_ALIGNED_DOUBLE; + tmp231 = tmp229 + tmp230; + tmp234 = K707106781 * (tmp232 + tmp233); + tmp235 = tmp231 - tmp234; + tmp415 = tmp231 + tmp234; + tmp485 = tmp230 - tmp229; + tmp486 = K707106781 * (tmp223 - tmp226); + tmp487 = tmp485 - tmp486; + tmp579 = tmp485 + tmp486; + } + } + { + fftw_real tmp66; + fftw_real tmp258; + fftw_real tmp302; + fftw_real tmp680; + fftw_real tmp69; + fftw_real tmp299; + fftw_real tmp261; + fftw_real tmp681; + fftw_real tmp76; + fftw_real tmp663; + fftw_real tmp272; + fftw_real tmp305; + fftw_real tmp73; + fftw_real tmp664; + fftw_real tmp267; + fftw_real tmp304; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp64; + fftw_real tmp65; + fftw_real tmp259; + fftw_real tmp260; + ASSERT_ALIGNED_DOUBLE; + tmp64 = c_re(input[istride]); + tmp65 = c_re(input[33 * istride]); + tmp66 = tmp64 + tmp65; + tmp258 = tmp64 - tmp65; + { + fftw_real tmp300; + fftw_real tmp301; + fftw_real tmp67; + fftw_real tmp68; + ASSERT_ALIGNED_DOUBLE; + tmp300 = c_im(input[istride]); + tmp301 = c_im(input[33 * istride]); + tmp302 = tmp300 - tmp301; + tmp680 = tmp300 + tmp301; + tmp67 = c_re(input[17 * istride]); + tmp68 = c_re(input[49 * istride]); + tmp69 = tmp67 + tmp68; + tmp299 = tmp67 - tmp68; + } + tmp259 = c_im(input[17 * istride]); + tmp260 = c_im(input[49 * istride]); + tmp261 = tmp259 - tmp260; + tmp681 = tmp259 + tmp260; + { + fftw_real tmp74; + fftw_real tmp75; + fftw_real tmp268; + fftw_real tmp269; + fftw_real tmp270; + fftw_real tmp271; + ASSERT_ALIGNED_DOUBLE; + tmp74 = c_re(input[57 * istride]); + tmp75 = c_re(input[25 * istride]); + tmp268 = tmp74 - tmp75; + tmp269 = c_im(input[57 * istride]); + tmp270 = c_im(input[25 * istride]); + tmp271 = tmp269 - tmp270; + tmp76 = tmp74 + tmp75; + tmp663 = tmp269 + tmp270; + tmp272 = tmp268 + tmp271; + tmp305 = tmp271 - tmp268; + } + { + fftw_real tmp71; + fftw_real tmp72; + fftw_real tmp263; + fftw_real tmp264; + fftw_real tmp265; + fftw_real tmp266; + ASSERT_ALIGNED_DOUBLE; + tmp71 = c_re(input[9 * istride]); + tmp72 = c_re(input[41 * istride]); + tmp263 = tmp71 - tmp72; + tmp264 = c_im(input[9 * istride]); + tmp265 = c_im(input[41 * istride]); + tmp266 = tmp264 - tmp265; + tmp73 = tmp71 + tmp72; + tmp664 = tmp264 + tmp265; + tmp267 = tmp263 - tmp266; + tmp304 = tmp263 + tmp266; + } + } + { + fftw_real tmp70; + fftw_real tmp77; + fftw_real tmp662; + fftw_real tmp665; + ASSERT_ALIGNED_DOUBLE; + tmp70 = tmp66 + tmp69; + tmp77 = tmp73 + tmp76; + tmp78 = tmp70 + tmp77; + tmp867 = tmp70 - tmp77; + tmp662 = tmp66 - tmp69; + tmp665 = tmp663 - tmp664; + tmp666 = tmp662 + tmp665; + tmp769 = tmp662 - tmp665; + } + { + fftw_real tmp819; + fftw_real tmp820; + fftw_real tmp679; + fftw_real tmp682; + ASSERT_ALIGNED_DOUBLE; + tmp819 = tmp680 + tmp681; + tmp820 = tmp664 + tmp663; + tmp821 = tmp819 + tmp820; + tmp864 = tmp819 - tmp820; + tmp679 = tmp73 - tmp76; + tmp682 = tmp680 - tmp681; + tmp683 = tmp679 + tmp682; + tmp766 = tmp682 - tmp679; + } + { + fftw_real tmp262; + fftw_real tmp273; + fftw_real tmp498; + fftw_real tmp499; + ASSERT_ALIGNED_DOUBLE; + tmp262 = tmp258 - tmp261; + tmp273 = K707106781 * (tmp267 + tmp272); + tmp274 = tmp262 - tmp273; + tmp425 = tmp262 + tmp273; + tmp498 = tmp258 + tmp261; + tmp499 = K707106781 * (tmp305 - tmp304); + tmp500 = tmp498 - tmp499; + tmp589 = tmp498 + tmp499; + } + { + fftw_real tmp303; + fftw_real tmp306; + fftw_real tmp509; + fftw_real tmp510; + ASSERT_ALIGNED_DOUBLE; + tmp303 = tmp299 + tmp302; + tmp306 = K707106781 * (tmp304 + tmp305); + tmp307 = tmp303 - tmp306; + tmp422 = tmp303 + tmp306; + tmp509 = tmp302 - tmp299; + tmp510 = K707106781 * (tmp267 - tmp272); + tmp511 = tmp509 - tmp510; + tmp586 = tmp509 + tmp510; + } + } + { + fftw_real tmp49; + fftw_real tmp237; + fftw_real tmp176; + fftw_real tmp249; + fftw_real tmp52; + fftw_real tmp248; + fftw_real tmp179; + fftw_real tmp238; + fftw_real tmp59; + fftw_real tmp252; + fftw_real tmp245; + fftw_real tmp186; + fftw_real tmp56; + fftw_real tmp251; + fftw_real tmp242; + fftw_real tmp183; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp47; + fftw_real tmp48; + fftw_real tmp177; + fftw_real tmp178; + ASSERT_ALIGNED_DOUBLE; + tmp47 = c_re(input[62 * istride]); + tmp48 = c_re(input[30 * istride]); + tmp49 = tmp47 + tmp48; + tmp237 = tmp47 - tmp48; + { + fftw_real tmp174; + fftw_real tmp175; + fftw_real tmp50; + fftw_real tmp51; + ASSERT_ALIGNED_DOUBLE; + tmp174 = c_im(input[62 * istride]); + tmp175 = c_im(input[30 * istride]); + tmp176 = tmp174 + tmp175; + tmp249 = tmp174 - tmp175; + tmp50 = c_re(input[14 * istride]); + tmp51 = c_re(input[46 * istride]); + tmp52 = tmp50 + tmp51; + tmp248 = tmp50 - tmp51; + } + tmp177 = c_im(input[14 * istride]); + tmp178 = c_im(input[46 * istride]); + tmp179 = tmp177 + tmp178; + tmp238 = tmp177 - tmp178; + { + fftw_real tmp57; + fftw_real tmp58; + fftw_real tmp243; + fftw_real tmp184; + fftw_real tmp185; + fftw_real tmp244; + ASSERT_ALIGNED_DOUBLE; + tmp57 = c_re(input[54 * istride]); + tmp58 = c_re(input[22 * istride]); + tmp243 = tmp57 - tmp58; + tmp184 = c_im(input[54 * istride]); + tmp185 = c_im(input[22 * istride]); + tmp244 = tmp184 - tmp185; + tmp59 = tmp57 + tmp58; + tmp252 = tmp244 - tmp243; + tmp245 = tmp243 + tmp244; + tmp186 = tmp184 + tmp185; + } + { + fftw_real tmp54; + fftw_real tmp55; + fftw_real tmp240; + fftw_real tmp181; + fftw_real tmp182; + fftw_real tmp241; + ASSERT_ALIGNED_DOUBLE; + tmp54 = c_re(input[6 * istride]); + tmp55 = c_re(input[38 * istride]); + tmp240 = tmp54 - tmp55; + tmp181 = c_im(input[6 * istride]); + tmp182 = c_im(input[38 * istride]); + tmp241 = tmp181 - tmp182; + tmp56 = tmp54 + tmp55; + tmp251 = tmp240 + tmp241; + tmp242 = tmp240 - tmp241; + tmp183 = tmp181 + tmp182; + } + } + { + fftw_real tmp53; + fftw_real tmp60; + fftw_real tmp653; + fftw_real tmp654; + ASSERT_ALIGNED_DOUBLE; + tmp53 = tmp49 + tmp52; + tmp60 = tmp56 + tmp59; + tmp61 = tmp53 + tmp60; + tmp859 = tmp53 - tmp60; + tmp653 = tmp56 - tmp59; + tmp654 = tmp176 - tmp179; + tmp655 = tmp653 + tmp654; + tmp761 = tmp654 - tmp653; + } + { + fftw_real tmp180; + fftw_real tmp187; + fftw_real tmp656; + fftw_real tmp657; + ASSERT_ALIGNED_DOUBLE; + tmp180 = tmp176 + tmp179; + tmp187 = tmp183 + tmp186; + tmp188 = tmp180 + tmp187; + tmp860 = tmp180 - tmp187; + tmp656 = tmp49 - tmp52; + tmp657 = tmp186 - tmp183; + tmp658 = tmp656 + tmp657; + tmp762 = tmp656 - tmp657; + } + { + fftw_real tmp239; + fftw_real tmp246; + fftw_real tmp489; + fftw_real tmp490; + ASSERT_ALIGNED_DOUBLE; + tmp239 = tmp237 - tmp238; + tmp246 = K707106781 * (tmp242 + tmp245); + tmp247 = tmp239 - tmp246; + tmp417 = tmp239 + tmp246; + tmp489 = tmp249 - tmp248; + tmp490 = K707106781 * (tmp242 - tmp245); + tmp491 = tmp489 - tmp490; + tmp581 = tmp489 + tmp490; + } + { + fftw_real tmp250; + fftw_real tmp253; + fftw_real tmp492; + fftw_real tmp493; + ASSERT_ALIGNED_DOUBLE; + tmp250 = tmp248 + tmp249; + tmp253 = K707106781 * (tmp251 + tmp252); + tmp254 = tmp250 - tmp253; + tmp418 = tmp250 + tmp253; + tmp492 = tmp237 + tmp238; + tmp493 = K707106781 * (tmp252 - tmp251); + tmp494 = tmp492 - tmp493; + tmp582 = tmp492 + tmp493; + } + } + { + fftw_real tmp81; + fftw_real tmp668; + fftw_real tmp84; + fftw_real tmp669; + fftw_real tmp290; + fftw_real tmp504; + fftw_real tmp295; + fftw_real tmp505; + fftw_real tmp670; + fftw_real tmp667; + fftw_real tmp88; + fftw_real tmp673; + fftw_real tmp91; + fftw_real tmp674; + fftw_real tmp279; + fftw_real tmp501; + fftw_real tmp284; + fftw_real tmp502; + fftw_real tmp675; + fftw_real tmp672; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp291; + fftw_real tmp289; + fftw_real tmp286; + fftw_real tmp294; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp79; + fftw_real tmp80; + fftw_real tmp287; + fftw_real tmp288; + ASSERT_ALIGNED_DOUBLE; + tmp79 = c_re(input[5 * istride]); + tmp80 = c_re(input[37 * istride]); + tmp81 = tmp79 + tmp80; + tmp291 = tmp79 - tmp80; + tmp287 = c_im(input[5 * istride]); + tmp288 = c_im(input[37 * istride]); + tmp289 = tmp287 - tmp288; + tmp668 = tmp287 + tmp288; + } + { + fftw_real tmp82; + fftw_real tmp83; + fftw_real tmp292; + fftw_real tmp293; + ASSERT_ALIGNED_DOUBLE; + tmp82 = c_re(input[21 * istride]); + tmp83 = c_re(input[53 * istride]); + tmp84 = tmp82 + tmp83; + tmp286 = tmp82 - tmp83; + tmp292 = c_im(input[21 * istride]); + tmp293 = c_im(input[53 * istride]); + tmp294 = tmp292 - tmp293; + tmp669 = tmp292 + tmp293; + } + tmp290 = tmp286 + tmp289; + tmp504 = tmp289 - tmp286; + tmp295 = tmp291 - tmp294; + tmp505 = tmp291 + tmp294; + tmp670 = tmp668 - tmp669; + tmp667 = tmp81 - tmp84; + } + { + fftw_real tmp280; + fftw_real tmp278; + fftw_real tmp275; + fftw_real tmp283; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp86; + fftw_real tmp87; + fftw_real tmp276; + fftw_real tmp277; + ASSERT_ALIGNED_DOUBLE; + tmp86 = c_re(input[61 * istride]); + tmp87 = c_re(input[29 * istride]); + tmp88 = tmp86 + tmp87; + tmp280 = tmp86 - tmp87; + tmp276 = c_im(input[61 * istride]); + tmp277 = c_im(input[29 * istride]); + tmp278 = tmp276 - tmp277; + tmp673 = tmp276 + tmp277; + } + { + fftw_real tmp89; + fftw_real tmp90; + fftw_real tmp281; + fftw_real tmp282; + ASSERT_ALIGNED_DOUBLE; + tmp89 = c_re(input[13 * istride]); + tmp90 = c_re(input[45 * istride]); + tmp91 = tmp89 + tmp90; + tmp275 = tmp89 - tmp90; + tmp281 = c_im(input[13 * istride]); + tmp282 = c_im(input[45 * istride]); + tmp283 = tmp281 - tmp282; + tmp674 = tmp281 + tmp282; + } + tmp279 = tmp275 + tmp278; + tmp501 = tmp278 - tmp275; + tmp284 = tmp280 - tmp283; + tmp502 = tmp280 + tmp283; + tmp675 = tmp673 - tmp674; + tmp672 = tmp88 - tmp91; + } + { + fftw_real tmp85; + fftw_real tmp92; + fftw_real tmp308; + fftw_real tmp309; + ASSERT_ALIGNED_DOUBLE; + tmp85 = tmp81 + tmp84; + tmp92 = tmp88 + tmp91; + tmp93 = tmp85 + tmp92; + tmp865 = tmp85 - tmp92; + tmp308 = (K923879532 * tmp295) - (K382683432 * tmp290); + tmp309 = (K382683432 * tmp279) + (K923879532 * tmp284); + tmp310 = tmp308 - tmp309; + tmp426 = tmp308 + tmp309; + } + { + fftw_real tmp285; + fftw_real tmp296; + fftw_real tmp822; + fftw_real tmp823; + ASSERT_ALIGNED_DOUBLE; + tmp285 = (K923879532 * tmp279) - (K382683432 * tmp284); + tmp296 = (K923879532 * tmp290) + (K382683432 * tmp295); + tmp297 = tmp285 - tmp296; + tmp423 = tmp296 + tmp285; + tmp822 = tmp668 + tmp669; + tmp823 = tmp673 + tmp674; + tmp824 = tmp822 + tmp823; + tmp868 = tmp823 - tmp822; + } + { + fftw_real tmp503; + fftw_real tmp506; + fftw_real tmp512; + fftw_real tmp513; + ASSERT_ALIGNED_DOUBLE; + tmp503 = (K382683432 * tmp501) - (K923879532 * tmp502); + tmp506 = (K382683432 * tmp504) + (K923879532 * tmp505); + tmp507 = tmp503 - tmp506; + tmp587 = tmp506 + tmp503; + tmp512 = (K382683432 * tmp505) - (K923879532 * tmp504); + tmp513 = (K923879532 * tmp501) + (K382683432 * tmp502); + tmp514 = tmp512 - tmp513; + tmp590 = tmp512 + tmp513; + } + { + fftw_real tmp671; + fftw_real tmp676; + fftw_real tmp684; + fftw_real tmp685; + ASSERT_ALIGNED_DOUBLE; + tmp671 = tmp667 - tmp670; + tmp676 = tmp672 + tmp675; + tmp677 = K707106781 * (tmp671 + tmp676); + tmp767 = K707106781 * (tmp671 - tmp676); + tmp684 = tmp667 + tmp670; + tmp685 = tmp675 - tmp672; + tmp686 = K707106781 * (tmp684 + tmp685); + tmp770 = K707106781 * (tmp685 - tmp684); + } + } + { + fftw_real tmp63; + fftw_real tmp851; + fftw_real tmp850; + fftw_real tmp852; + fftw_real tmp126; + fftw_real tmp127; + fftw_real tmp190; + fftw_real tmp847; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp31; + fftw_real tmp62; + fftw_real tmp848; + fftw_real tmp849; + ASSERT_ALIGNED_DOUBLE; + tmp31 = tmp15 + tmp30; + tmp62 = tmp46 + tmp61; + tmp63 = tmp31 + tmp62; + tmp851 = tmp31 - tmp62; + tmp848 = tmp821 + tmp824; + tmp849 = tmp830 + tmp833; + tmp850 = tmp848 + tmp849; + tmp852 = tmp849 - tmp848; + } + { + fftw_real tmp94; + fftw_real tmp125; + fftw_real tmp158; + fftw_real tmp189; + ASSERT_ALIGNED_DOUBLE; + tmp94 = tmp78 + tmp93; + tmp125 = tmp109 + tmp124; + tmp126 = tmp94 + tmp125; + tmp127 = tmp94 - tmp125; + tmp158 = tmp142 + tmp157; + tmp189 = tmp173 + tmp188; + tmp190 = tmp158 - tmp189; + tmp847 = tmp158 + tmp189; + } + c_re(output[32 * ostride]) = tmp63 - tmp126; + c_re(output[0]) = tmp63 + tmp126; + c_im(output[16 * ostride]) = tmp127 + tmp190; + c_im(output[48 * ostride]) = tmp190 - tmp127; + c_im(output[32 * ostride]) = tmp847 - tmp850; + c_im(output[0]) = tmp847 + tmp850; + c_re(output[48 * ostride]) = tmp851 - tmp852; + c_re(output[16 * ostride]) = tmp851 + tmp852; + } + { + fftw_real tmp817; + fftw_real tmp841; + fftw_real tmp839; + fftw_real tmp845; + fftw_real tmp826; + fftw_real tmp842; + fftw_real tmp835; + fftw_real tmp843; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp815; + fftw_real tmp816; + fftw_real tmp837; + fftw_real tmp838; + ASSERT_ALIGNED_DOUBLE; + tmp815 = tmp142 - tmp157; + tmp816 = tmp46 - tmp61; + tmp817 = tmp815 - tmp816; + tmp841 = tmp816 + tmp815; + tmp837 = tmp15 - tmp30; + tmp838 = tmp188 - tmp173; + tmp839 = tmp837 + tmp838; + tmp845 = tmp837 - tmp838; + } + { + fftw_real tmp818; + fftw_real tmp825; + fftw_real tmp827; + fftw_real tmp834; + ASSERT_ALIGNED_DOUBLE; + tmp818 = tmp78 - tmp93; + tmp825 = tmp821 - tmp824; + tmp826 = tmp818 - tmp825; + tmp842 = tmp818 + tmp825; + tmp827 = tmp109 - tmp124; + tmp834 = tmp830 - tmp833; + tmp835 = tmp827 + tmp834; + tmp843 = tmp834 - tmp827; + } + { + fftw_real tmp836; + fftw_real tmp840; + fftw_real tmp844; + fftw_real tmp846; + ASSERT_ALIGNED_DOUBLE; + tmp836 = K707106781 * (tmp826 - tmp835); + c_im(output[56 * ostride]) = tmp817 - tmp836; + c_im(output[24 * ostride]) = tmp817 + tmp836; + tmp840 = K707106781 * (tmp826 + tmp835); + c_re(output[40 * ostride]) = tmp839 - tmp840; + c_re(output[8 * ostride]) = tmp839 + tmp840; + tmp844 = K707106781 * (tmp842 + tmp843); + c_im(output[40 * ostride]) = tmp841 - tmp844; + c_im(output[8 * ostride]) = tmp841 + tmp844; + tmp846 = K707106781 * (tmp843 - tmp842); + c_re(output[56 * ostride]) = tmp845 - tmp846; + c_re(output[24 * ostride]) = tmp845 + tmp846; + } + } + { + fftw_real tmp217; + fftw_real tmp391; + fftw_real tmp396; + fftw_real tmp406; + fftw_real tmp399; + fftw_real tmp407; + fftw_real tmp367; + fftw_real tmp387; + fftw_real tmp312; + fftw_real tmp386; + fftw_real tmp379; + fftw_real tmp401; + fftw_real tmp382; + fftw_real tmp392; + fftw_real tmp256; + fftw_real tmp402; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp201; + fftw_real tmp216; + fftw_real tmp394; + fftw_real tmp395; + ASSERT_ALIGNED_DOUBLE; + tmp201 = tmp193 - tmp200; + tmp216 = tmp208 - tmp215; + tmp217 = tmp201 - tmp216; + tmp391 = tmp201 + tmp216; + tmp394 = tmp274 + tmp297; + tmp395 = tmp307 + tmp310; + tmp396 = (K881921264 * tmp394) - (K471396736 * tmp395); + tmp406 = (K471396736 * tmp394) + (K881921264 * tmp395); + } + { + fftw_real tmp397; + fftw_real tmp398; + fftw_real tmp353; + fftw_real tmp366; + ASSERT_ALIGNED_DOUBLE; + tmp397 = tmp329 + tmp352; + tmp398 = tmp362 + tmp365; + tmp399 = (K881921264 * tmp397) + (K471396736 * tmp398); + tmp407 = (K881921264 * tmp398) - (K471396736 * tmp397); + tmp353 = tmp329 - tmp352; + tmp366 = tmp362 - tmp365; + tmp367 = (K290284677 * tmp353) + (K956940335 * tmp366); + tmp387 = (K290284677 * tmp366) - (K956940335 * tmp353); + } + { + fftw_real tmp298; + fftw_real tmp311; + fftw_real tmp375; + fftw_real tmp378; + ASSERT_ALIGNED_DOUBLE; + tmp298 = tmp274 - tmp297; + tmp311 = tmp307 - tmp310; + tmp312 = (K290284677 * tmp298) - (K956940335 * tmp311); + tmp386 = (K956940335 * tmp298) + (K290284677 * tmp311); + tmp375 = tmp371 - tmp374; + tmp378 = tmp376 - tmp377; + tmp379 = tmp375 - tmp378; + tmp401 = tmp375 + tmp378; + } + { + fftw_real tmp380; + fftw_real tmp381; + fftw_real tmp236; + fftw_real tmp255; + ASSERT_ALIGNED_DOUBLE; + tmp380 = (K555570233 * tmp254) - (K831469612 * tmp247); + tmp381 = (K831469612 * tmp228) + (K555570233 * tmp235); + tmp382 = tmp380 - tmp381; + tmp392 = tmp381 + tmp380; + tmp236 = (K555570233 * tmp228) - (K831469612 * tmp235); + tmp255 = (K555570233 * tmp247) + (K831469612 * tmp254); + tmp256 = tmp236 - tmp255; + tmp402 = tmp236 + tmp255; + } + { + fftw_real tmp257; + fftw_real tmp368; + fftw_real tmp383; + fftw_real tmp384; + ASSERT_ALIGNED_DOUBLE; + tmp257 = tmp217 - tmp256; + tmp368 = tmp312 - tmp367; + c_im(output[61 * ostride]) = tmp257 - tmp368; + c_im(output[29 * ostride]) = tmp257 + tmp368; + tmp383 = tmp379 + tmp382; + tmp384 = tmp312 + tmp367; + c_re(output[45 * ostride]) = tmp383 - tmp384; + c_re(output[13 * ostride]) = tmp383 + tmp384; + } + { + fftw_real tmp389; + fftw_real tmp390; + fftw_real tmp385; + fftw_real tmp388; + ASSERT_ALIGNED_DOUBLE; + tmp389 = tmp379 - tmp382; + tmp390 = tmp387 - tmp386; + c_re(output[61 * ostride]) = tmp389 - tmp390; + c_re(output[29 * ostride]) = tmp389 + tmp390; + tmp385 = tmp217 + tmp256; + tmp388 = tmp386 + tmp387; + c_im(output[45 * ostride]) = tmp385 - tmp388; + c_im(output[13 * ostride]) = tmp385 + tmp388; + } + { + fftw_real tmp393; + fftw_real tmp400; + fftw_real tmp403; + fftw_real tmp404; + ASSERT_ALIGNED_DOUBLE; + tmp393 = tmp391 - tmp392; + tmp400 = tmp396 - tmp399; + c_im(output[53 * ostride]) = tmp393 - tmp400; + c_im(output[21 * ostride]) = tmp393 + tmp400; + tmp403 = tmp401 + tmp402; + tmp404 = tmp396 + tmp399; + c_re(output[37 * ostride]) = tmp403 - tmp404; + c_re(output[5 * ostride]) = tmp403 + tmp404; + } + { + fftw_real tmp409; + fftw_real tmp410; + fftw_real tmp405; + fftw_real tmp408; + ASSERT_ALIGNED_DOUBLE; + tmp409 = tmp401 - tmp402; + tmp410 = tmp407 - tmp406; + c_re(output[53 * ostride]) = tmp409 - tmp410; + c_re(output[21 * ostride]) = tmp409 + tmp410; + tmp405 = tmp391 + tmp392; + tmp408 = tmp406 + tmp407; + c_im(output[37 * ostride]) = tmp405 - tmp408; + c_im(output[5 * ostride]) = tmp405 + tmp408; + } + } + { + fftw_real tmp413; + fftw_real tmp451; + fftw_real tmp456; + fftw_real tmp466; + fftw_real tmp459; + fftw_real tmp467; + fftw_real tmp435; + fftw_real tmp447; + fftw_real tmp428; + fftw_real tmp446; + fftw_real tmp439; + fftw_real tmp461; + fftw_real tmp442; + fftw_real tmp452; + fftw_real tmp420; + fftw_real tmp462; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp411; + fftw_real tmp412; + fftw_real tmp454; + fftw_real tmp455; + ASSERT_ALIGNED_DOUBLE; + tmp411 = tmp193 + tmp200; + tmp412 = tmp377 + tmp376; + tmp413 = tmp411 - tmp412; + tmp451 = tmp411 + tmp412; + tmp454 = tmp422 + tmp423; + tmp455 = tmp425 + tmp426; + tmp456 = (K995184726 * tmp454) + (K098017140 * tmp455); + tmp466 = (K995184726 * tmp455) - (K098017140 * tmp454); + } + { + fftw_real tmp457; + fftw_real tmp458; + fftw_real tmp431; + fftw_real tmp434; + ASSERT_ALIGNED_DOUBLE; + tmp457 = tmp429 + tmp430; + tmp458 = tmp432 + tmp433; + tmp459 = (K995184726 * tmp457) - (K098017140 * tmp458); + tmp467 = (K098017140 * tmp457) + (K995184726 * tmp458); + tmp431 = tmp429 - tmp430; + tmp434 = tmp432 - tmp433; + tmp435 = (K634393284 * tmp431) - (K773010453 * tmp434); + tmp447 = (K773010453 * tmp431) + (K634393284 * tmp434); + } + { + fftw_real tmp424; + fftw_real tmp427; + fftw_real tmp437; + fftw_real tmp438; + ASSERT_ALIGNED_DOUBLE; + tmp424 = tmp422 - tmp423; + tmp427 = tmp425 - tmp426; + tmp428 = (K634393284 * tmp424) + (K773010453 * tmp427); + tmp446 = (K634393284 * tmp427) - (K773010453 * tmp424); + tmp437 = tmp371 + tmp374; + tmp438 = tmp208 + tmp215; + tmp439 = tmp437 - tmp438; + tmp461 = tmp437 + tmp438; + } + { + fftw_real tmp440; + fftw_real tmp441; + fftw_real tmp416; + fftw_real tmp419; + ASSERT_ALIGNED_DOUBLE; + tmp440 = (K980785280 * tmp418) - (K195090322 * tmp417); + tmp441 = (K195090322 * tmp414) + (K980785280 * tmp415); + tmp442 = tmp440 - tmp441; + tmp452 = tmp441 + tmp440; + tmp416 = (K980785280 * tmp414) - (K195090322 * tmp415); + tmp419 = (K980785280 * tmp417) + (K195090322 * tmp418); + tmp420 = tmp416 - tmp419; + tmp462 = tmp416 + tmp419; + } + { + fftw_real tmp421; + fftw_real tmp436; + fftw_real tmp443; + fftw_real tmp444; + ASSERT_ALIGNED_DOUBLE; + tmp421 = tmp413 + tmp420; + tmp436 = tmp428 + tmp435; + c_im(output[41 * ostride]) = tmp421 - tmp436; + c_im(output[9 * ostride]) = tmp421 + tmp436; + tmp443 = tmp439 - tmp442; + tmp444 = tmp435 - tmp428; + c_re(output[57 * ostride]) = tmp443 - tmp444; + c_re(output[25 * ostride]) = tmp443 + tmp444; + } + { + fftw_real tmp449; + fftw_real tmp450; + fftw_real tmp445; + fftw_real tmp448; + ASSERT_ALIGNED_DOUBLE; + tmp449 = tmp439 + tmp442; + tmp450 = tmp446 + tmp447; + c_re(output[41 * ostride]) = tmp449 - tmp450; + c_re(output[9 * ostride]) = tmp449 + tmp450; + tmp445 = tmp413 - tmp420; + tmp448 = tmp446 - tmp447; + c_im(output[57 * ostride]) = tmp445 - tmp448; + c_im(output[25 * ostride]) = tmp445 + tmp448; + } + { + fftw_real tmp453; + fftw_real tmp460; + fftw_real tmp463; + fftw_real tmp464; + ASSERT_ALIGNED_DOUBLE; + tmp453 = tmp451 + tmp452; + tmp460 = tmp456 + tmp459; + c_im(output[33 * ostride]) = tmp453 - tmp460; + c_im(output[ostride]) = tmp453 + tmp460; + tmp463 = tmp461 - tmp462; + tmp464 = tmp459 - tmp456; + c_re(output[49 * ostride]) = tmp463 - tmp464; + c_re(output[17 * ostride]) = tmp463 + tmp464; + } + { + fftw_real tmp469; + fftw_real tmp470; + fftw_real tmp465; + fftw_real tmp468; + ASSERT_ALIGNED_DOUBLE; + tmp469 = tmp461 + tmp462; + tmp470 = tmp466 + tmp467; + c_re(output[33 * ostride]) = tmp469 - tmp470; + c_re(output[ostride]) = tmp469 + tmp470; + tmp465 = tmp451 - tmp452; + tmp468 = tmp466 - tmp467; + c_im(output[49 * ostride]) = tmp465 - tmp468; + c_im(output[17 * ostride]) = tmp465 + tmp468; + } + } + { + fftw_real tmp855; + fftw_real tmp893; + fftw_real tmp862; + fftw_real tmp904; + fftw_real tmp884; + fftw_real tmp894; + fftw_real tmp870; + fftw_real tmp888; + fftw_real tmp881; + fftw_real tmp903; + fftw_real tmp898; + fftw_real tmp908; + fftw_real tmp877; + fftw_real tmp889; + fftw_real tmp901; + fftw_real tmp909; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp858; + fftw_real tmp861; + fftw_real tmp896; + fftw_real tmp897; + ASSERT_ALIGNED_DOUBLE; + tmp855 = tmp853 - tmp854; + tmp893 = tmp854 + tmp853; + tmp858 = tmp856 - tmp857; + tmp861 = tmp859 + tmp860; + tmp862 = K707106781 * (tmp858 - tmp861); + tmp904 = K707106781 * (tmp858 + tmp861); + { + fftw_real tmp882; + fftw_real tmp883; + fftw_real tmp866; + fftw_real tmp869; + ASSERT_ALIGNED_DOUBLE; + tmp882 = tmp860 - tmp859; + tmp883 = tmp856 + tmp857; + tmp884 = K707106781 * (tmp882 - tmp883); + tmp894 = K707106781 * (tmp883 + tmp882); + tmp866 = tmp864 - tmp865; + tmp869 = tmp867 - tmp868; + tmp870 = (K382683432 * tmp866) + (K923879532 * tmp869); + tmp888 = (K382683432 * tmp869) - (K923879532 * tmp866); + } + tmp881 = tmp879 - tmp880; + tmp903 = tmp879 + tmp880; + tmp896 = tmp865 + tmp864; + tmp897 = tmp867 + tmp868; + tmp898 = (K923879532 * tmp896) + (K382683432 * tmp897); + tmp908 = (K923879532 * tmp897) - (K382683432 * tmp896); + { + fftw_real tmp873; + fftw_real tmp876; + fftw_real tmp899; + fftw_real tmp900; + ASSERT_ALIGNED_DOUBLE; + tmp873 = tmp871 - tmp872; + tmp876 = tmp874 - tmp875; + tmp877 = (K382683432 * tmp873) - (K923879532 * tmp876); + tmp889 = (K923879532 * tmp873) + (K382683432 * tmp876); + tmp899 = tmp872 + tmp871; + tmp900 = tmp874 + tmp875; + tmp901 = (K923879532 * tmp899) - (K382683432 * tmp900); + tmp909 = (K382683432 * tmp899) + (K923879532 * tmp900); + } + } + { + fftw_real tmp863; + fftw_real tmp878; + fftw_real tmp885; + fftw_real tmp886; + ASSERT_ALIGNED_DOUBLE; + tmp863 = tmp855 + tmp862; + tmp878 = tmp870 + tmp877; + c_im(output[44 * ostride]) = tmp863 - tmp878; + c_im(output[12 * ostride]) = tmp863 + tmp878; + tmp885 = tmp881 - tmp884; + tmp886 = tmp877 - tmp870; + c_re(output[60 * ostride]) = tmp885 - tmp886; + c_re(output[28 * ostride]) = tmp885 + tmp886; + } + { + fftw_real tmp891; + fftw_real tmp892; + fftw_real tmp887; + fftw_real tmp890; + ASSERT_ALIGNED_DOUBLE; + tmp891 = tmp881 + tmp884; + tmp892 = tmp888 + tmp889; + c_re(output[44 * ostride]) = tmp891 - tmp892; + c_re(output[12 * ostride]) = tmp891 + tmp892; + tmp887 = tmp855 - tmp862; + tmp890 = tmp888 - tmp889; + c_im(output[60 * ostride]) = tmp887 - tmp890; + c_im(output[28 * ostride]) = tmp887 + tmp890; + } + { + fftw_real tmp895; + fftw_real tmp902; + fftw_real tmp905; + fftw_real tmp906; + ASSERT_ALIGNED_DOUBLE; + tmp895 = tmp893 + tmp894; + tmp902 = tmp898 + tmp901; + c_im(output[36 * ostride]) = tmp895 - tmp902; + c_im(output[4 * ostride]) = tmp895 + tmp902; + tmp905 = tmp903 - tmp904; + tmp906 = tmp901 - tmp898; + c_re(output[52 * ostride]) = tmp905 - tmp906; + c_re(output[20 * ostride]) = tmp905 + tmp906; + } + { + fftw_real tmp911; + fftw_real tmp912; + fftw_real tmp907; + fftw_real tmp910; + ASSERT_ALIGNED_DOUBLE; + tmp911 = tmp903 + tmp904; + tmp912 = tmp908 + tmp909; + c_re(output[36 * ostride]) = tmp911 - tmp912; + c_re(output[4 * ostride]) = tmp911 + tmp912; + tmp907 = tmp893 - tmp894; + tmp910 = tmp908 - tmp909; + c_im(output[52 * ostride]) = tmp907 - tmp910; + c_im(output[20 * ostride]) = tmp907 + tmp910; + } + } + { + fftw_real tmp757; + fftw_real tmp795; + fftw_real tmp800; + fftw_real tmp810; + fftw_real tmp803; + fftw_real tmp811; + fftw_real tmp779; + fftw_real tmp791; + fftw_real tmp783; + fftw_real tmp805; + fftw_real tmp764; + fftw_real tmp806; + fftw_real tmp786; + fftw_real tmp796; + fftw_real tmp772; + fftw_real tmp790; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp756; + fftw_real tmp798; + fftw_real tmp799; + fftw_real tmp782; + fftw_real tmp760; + fftw_real tmp763; + ASSERT_ALIGNED_DOUBLE; + tmp756 = K707106781 * (tmp720 - tmp721); + tmp757 = tmp755 - tmp756; + tmp795 = tmp755 + tmp756; + tmp798 = tmp766 + tmp767; + tmp799 = tmp769 + tmp770; + tmp800 = (K831469612 * tmp798) + (K555570233 * tmp799); + tmp810 = (K831469612 * tmp799) - (K555570233 * tmp798); + { + fftw_real tmp801; + fftw_real tmp802; + fftw_real tmp775; + fftw_real tmp778; + ASSERT_ALIGNED_DOUBLE; + tmp801 = tmp773 + tmp774; + tmp802 = tmp776 + tmp777; + tmp803 = (K831469612 * tmp801) - (K555570233 * tmp802); + tmp811 = (K555570233 * tmp801) + (K831469612 * tmp802); + tmp775 = tmp773 - tmp774; + tmp778 = tmp776 - tmp777; + tmp779 = (K195090322 * tmp775) - (K980785280 * tmp778); + tmp791 = (K980785280 * tmp775) + (K195090322 * tmp778); + } + tmp782 = K707106781 * (tmp643 - tmp640); + tmp783 = tmp781 - tmp782; + tmp805 = tmp781 + tmp782; + tmp760 = (K382683432 * tmp758) - (K923879532 * tmp759); + tmp763 = (K923879532 * tmp761) + (K382683432 * tmp762); + tmp764 = tmp760 - tmp763; + tmp806 = tmp760 + tmp763; + { + fftw_real tmp784; + fftw_real tmp785; + fftw_real tmp768; + fftw_real tmp771; + ASSERT_ALIGNED_DOUBLE; + tmp784 = (K382683432 * tmp761) - (K923879532 * tmp762); + tmp785 = (K382683432 * tmp759) + (K923879532 * tmp758); + tmp786 = tmp784 - tmp785; + tmp796 = tmp785 + tmp784; + tmp768 = tmp766 - tmp767; + tmp771 = tmp769 - tmp770; + tmp772 = (K195090322 * tmp768) + (K980785280 * tmp771); + tmp790 = (K195090322 * tmp771) - (K980785280 * tmp768); + } + } + { + fftw_real tmp765; + fftw_real tmp780; + fftw_real tmp787; + fftw_real tmp788; + ASSERT_ALIGNED_DOUBLE; + tmp765 = tmp757 + tmp764; + tmp780 = tmp772 + tmp779; + c_im(output[46 * ostride]) = tmp765 - tmp780; + c_im(output[14 * ostride]) = tmp765 + tmp780; + tmp787 = tmp783 - tmp786; + tmp788 = tmp779 - tmp772; + c_re(output[62 * ostride]) = tmp787 - tmp788; + c_re(output[30 * ostride]) = tmp787 + tmp788; + } + { + fftw_real tmp793; + fftw_real tmp794; + fftw_real tmp789; + fftw_real tmp792; + ASSERT_ALIGNED_DOUBLE; + tmp793 = tmp783 + tmp786; + tmp794 = tmp790 + tmp791; + c_re(output[46 * ostride]) = tmp793 - tmp794; + c_re(output[14 * ostride]) = tmp793 + tmp794; + tmp789 = tmp757 - tmp764; + tmp792 = tmp790 - tmp791; + c_im(output[62 * ostride]) = tmp789 - tmp792; + c_im(output[30 * ostride]) = tmp789 + tmp792; + } + { + fftw_real tmp797; + fftw_real tmp804; + fftw_real tmp807; + fftw_real tmp808; + ASSERT_ALIGNED_DOUBLE; + tmp797 = tmp795 + tmp796; + tmp804 = tmp800 + tmp803; + c_im(output[38 * ostride]) = tmp797 - tmp804; + c_im(output[6 * ostride]) = tmp797 + tmp804; + tmp807 = tmp805 - tmp806; + tmp808 = tmp803 - tmp800; + c_re(output[54 * ostride]) = tmp807 - tmp808; + c_re(output[22 * ostride]) = tmp807 + tmp808; + } + { + fftw_real tmp813; + fftw_real tmp814; + fftw_real tmp809; + fftw_real tmp812; + ASSERT_ALIGNED_DOUBLE; + tmp813 = tmp805 + tmp806; + tmp814 = tmp810 + tmp811; + c_re(output[38 * ostride]) = tmp813 - tmp814; + c_re(output[6 * ostride]) = tmp813 + tmp814; + tmp809 = tmp795 - tmp796; + tmp812 = tmp810 - tmp811; + c_im(output[54 * ostride]) = tmp809 - tmp812; + c_im(output[22 * ostride]) = tmp809 + tmp812; + } + } + { + fftw_real tmp645; + fftw_real tmp735; + fftw_real tmp740; + fftw_real tmp750; + fftw_real tmp743; + fftw_real tmp751; + fftw_real tmp715; + fftw_real tmp731; + fftw_real tmp723; + fftw_real tmp745; + fftw_real tmp660; + fftw_real tmp746; + fftw_real tmp726; + fftw_real tmp736; + fftw_real tmp688; + fftw_real tmp730; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp644; + fftw_real tmp738; + fftw_real tmp739; + fftw_real tmp722; + fftw_real tmp652; + fftw_real tmp659; + ASSERT_ALIGNED_DOUBLE; + tmp644 = K707106781 * (tmp640 + tmp643); + tmp645 = tmp637 - tmp644; + tmp735 = tmp637 + tmp644; + tmp738 = tmp666 + tmp677; + tmp739 = tmp683 + tmp686; + tmp740 = (K980785280 * tmp738) - (K195090322 * tmp739); + tmp750 = (K195090322 * tmp738) + (K980785280 * tmp739); + { + fftw_real tmp741; + fftw_real tmp742; + fftw_real tmp705; + fftw_real tmp714; + ASSERT_ALIGNED_DOUBLE; + tmp741 = tmp693 + tmp704; + tmp742 = tmp710 + tmp713; + tmp743 = (K980785280 * tmp741) + (K195090322 * tmp742); + tmp751 = (K980785280 * tmp742) - (K195090322 * tmp741); + tmp705 = tmp693 - tmp704; + tmp714 = tmp710 - tmp713; + tmp715 = (K555570233 * tmp705) + (K831469612 * tmp714); + tmp731 = (K555570233 * tmp714) - (K831469612 * tmp705); + } + tmp722 = K707106781 * (tmp720 + tmp721); + tmp723 = tmp719 - tmp722; + tmp745 = tmp719 + tmp722; + tmp652 = (K923879532 * tmp648) - (K382683432 * tmp651); + tmp659 = (K382683432 * tmp655) + (K923879532 * tmp658); + tmp660 = tmp652 - tmp659; + tmp746 = tmp652 + tmp659; + { + fftw_real tmp724; + fftw_real tmp725; + fftw_real tmp678; + fftw_real tmp687; + ASSERT_ALIGNED_DOUBLE; + tmp724 = (K923879532 * tmp655) - (K382683432 * tmp658); + tmp725 = (K923879532 * tmp651) + (K382683432 * tmp648); + tmp726 = tmp724 - tmp725; + tmp736 = tmp725 + tmp724; + tmp678 = tmp666 - tmp677; + tmp687 = tmp683 - tmp686; + tmp688 = (K555570233 * tmp678) - (K831469612 * tmp687); + tmp730 = (K831469612 * tmp678) + (K555570233 * tmp687); + } + } + { + fftw_real tmp661; + fftw_real tmp716; + fftw_real tmp727; + fftw_real tmp728; + ASSERT_ALIGNED_DOUBLE; + tmp661 = tmp645 - tmp660; + tmp716 = tmp688 - tmp715; + c_im(output[58 * ostride]) = tmp661 - tmp716; + c_im(output[26 * ostride]) = tmp661 + tmp716; + tmp727 = tmp723 + tmp726; + tmp728 = tmp688 + tmp715; + c_re(output[42 * ostride]) = tmp727 - tmp728; + c_re(output[10 * ostride]) = tmp727 + tmp728; + } + { + fftw_real tmp733; + fftw_real tmp734; + fftw_real tmp729; + fftw_real tmp732; + ASSERT_ALIGNED_DOUBLE; + tmp733 = tmp723 - tmp726; + tmp734 = tmp731 - tmp730; + c_re(output[58 * ostride]) = tmp733 - tmp734; + c_re(output[26 * ostride]) = tmp733 + tmp734; + tmp729 = tmp645 + tmp660; + tmp732 = tmp730 + tmp731; + c_im(output[42 * ostride]) = tmp729 - tmp732; + c_im(output[10 * ostride]) = tmp729 + tmp732; + } + { + fftw_real tmp737; + fftw_real tmp744; + fftw_real tmp747; + fftw_real tmp748; + ASSERT_ALIGNED_DOUBLE; + tmp737 = tmp735 - tmp736; + tmp744 = tmp740 - tmp743; + c_im(output[50 * ostride]) = tmp737 - tmp744; + c_im(output[18 * ostride]) = tmp737 + tmp744; + tmp747 = tmp745 + tmp746; + tmp748 = tmp740 + tmp743; + c_re(output[34 * ostride]) = tmp747 - tmp748; + c_re(output[2 * ostride]) = tmp747 + tmp748; + } + { + fftw_real tmp753; + fftw_real tmp754; + fftw_real tmp749; + fftw_real tmp752; + ASSERT_ALIGNED_DOUBLE; + tmp753 = tmp745 - tmp746; + tmp754 = tmp751 - tmp750; + c_re(output[50 * ostride]) = tmp753 - tmp754; + c_re(output[18 * ostride]) = tmp753 + tmp754; + tmp749 = tmp735 + tmp736; + tmp752 = tmp750 + tmp751; + c_im(output[34 * ostride]) = tmp749 - tmp752; + c_im(output[2 * ostride]) = tmp749 + tmp752; + } + } + { + fftw_real tmp481; + fftw_real tmp555; + fftw_real tmp560; + fftw_real tmp570; + fftw_real tmp563; + fftw_real tmp571; + fftw_real tmp535; + fftw_real tmp551; + fftw_real tmp516; + fftw_real tmp550; + fftw_real tmp543; + fftw_real tmp565; + fftw_real tmp546; + fftw_real tmp556; + fftw_real tmp496; + fftw_real tmp566; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp473; + fftw_real tmp480; + fftw_real tmp558; + fftw_real tmp559; + ASSERT_ALIGNED_DOUBLE; + tmp473 = tmp471 - tmp472; + tmp480 = tmp476 - tmp479; + tmp481 = tmp473 - tmp480; + tmp555 = tmp473 + tmp480; + tmp558 = tmp500 + tmp507; + tmp559 = tmp511 + tmp514; + tmp560 = (K773010453 * tmp558) - (K634393284 * tmp559); + tmp570 = (K634393284 * tmp558) + (K773010453 * tmp559); + } + { + fftw_real tmp561; + fftw_real tmp562; + fftw_real tmp527; + fftw_real tmp534; + ASSERT_ALIGNED_DOUBLE; + tmp561 = tmp519 + tmp526; + tmp562 = tmp530 + tmp533; + tmp563 = (K773010453 * tmp561) + (K634393284 * tmp562); + tmp571 = (K773010453 * tmp562) - (K634393284 * tmp561); + tmp527 = tmp519 - tmp526; + tmp534 = tmp530 - tmp533; + tmp535 = (K098017140 * tmp527) + (K995184726 * tmp534); + tmp551 = (K098017140 * tmp534) - (K995184726 * tmp527); + } + { + fftw_real tmp508; + fftw_real tmp515; + fftw_real tmp539; + fftw_real tmp542; + ASSERT_ALIGNED_DOUBLE; + tmp508 = tmp500 - tmp507; + tmp515 = tmp511 - tmp514; + tmp516 = (K098017140 * tmp508) - (K995184726 * tmp515); + tmp550 = (K995184726 * tmp508) + (K098017140 * tmp515); + tmp539 = tmp537 - tmp538; + tmp542 = tmp540 - tmp541; + tmp543 = tmp539 - tmp542; + tmp565 = tmp539 + tmp542; + } + { + fftw_real tmp544; + fftw_real tmp545; + fftw_real tmp488; + fftw_real tmp495; + ASSERT_ALIGNED_DOUBLE; + tmp544 = (K195090322 * tmp491) - (K980785280 * tmp494); + tmp545 = (K195090322 * tmp487) + (K980785280 * tmp484); + tmp546 = tmp544 - tmp545; + tmp556 = tmp545 + tmp544; + tmp488 = (K195090322 * tmp484) - (K980785280 * tmp487); + tmp495 = (K980785280 * tmp491) + (K195090322 * tmp494); + tmp496 = tmp488 - tmp495; + tmp566 = tmp488 + tmp495; + } + { + fftw_real tmp497; + fftw_real tmp536; + fftw_real tmp547; + fftw_real tmp548; + ASSERT_ALIGNED_DOUBLE; + tmp497 = tmp481 - tmp496; + tmp536 = tmp516 - tmp535; + c_im(output[63 * ostride]) = tmp497 - tmp536; + c_im(output[31 * ostride]) = tmp497 + tmp536; + tmp547 = tmp543 + tmp546; + tmp548 = tmp516 + tmp535; + c_re(output[47 * ostride]) = tmp547 - tmp548; + c_re(output[15 * ostride]) = tmp547 + tmp548; + } + { + fftw_real tmp553; + fftw_real tmp554; + fftw_real tmp549; + fftw_real tmp552; + ASSERT_ALIGNED_DOUBLE; + tmp553 = tmp543 - tmp546; + tmp554 = tmp551 - tmp550; + c_re(output[63 * ostride]) = tmp553 - tmp554; + c_re(output[31 * ostride]) = tmp553 + tmp554; + tmp549 = tmp481 + tmp496; + tmp552 = tmp550 + tmp551; + c_im(output[47 * ostride]) = tmp549 - tmp552; + c_im(output[15 * ostride]) = tmp549 + tmp552; + } + { + fftw_real tmp557; + fftw_real tmp564; + fftw_real tmp567; + fftw_real tmp568; + ASSERT_ALIGNED_DOUBLE; + tmp557 = tmp555 - tmp556; + tmp564 = tmp560 - tmp563; + c_im(output[55 * ostride]) = tmp557 - tmp564; + c_im(output[23 * ostride]) = tmp557 + tmp564; + tmp567 = tmp565 + tmp566; + tmp568 = tmp560 + tmp563; + c_re(output[39 * ostride]) = tmp567 - tmp568; + c_re(output[7 * ostride]) = tmp567 + tmp568; + } + { + fftw_real tmp573; + fftw_real tmp574; + fftw_real tmp569; + fftw_real tmp572; + ASSERT_ALIGNED_DOUBLE; + tmp573 = tmp565 - tmp566; + tmp574 = tmp571 - tmp570; + c_re(output[55 * ostride]) = tmp573 - tmp574; + c_re(output[23 * ostride]) = tmp573 + tmp574; + tmp569 = tmp555 + tmp556; + tmp572 = tmp570 + tmp571; + c_im(output[39 * ostride]) = tmp569 - tmp572; + c_im(output[7 * ostride]) = tmp569 + tmp572; + } + } + { + fftw_real tmp577; + fftw_real tmp615; + fftw_real tmp620; + fftw_real tmp630; + fftw_real tmp623; + fftw_real tmp631; + fftw_real tmp599; + fftw_real tmp611; + fftw_real tmp592; + fftw_real tmp610; + fftw_real tmp603; + fftw_real tmp625; + fftw_real tmp606; + fftw_real tmp616; + fftw_real tmp584; + fftw_real tmp626; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp575; + fftw_real tmp576; + fftw_real tmp618; + fftw_real tmp619; + ASSERT_ALIGNED_DOUBLE; + tmp575 = tmp471 + tmp472; + tmp576 = tmp541 + tmp540; + tmp577 = tmp575 - tmp576; + tmp615 = tmp575 + tmp576; + tmp618 = tmp586 + tmp587; + tmp619 = tmp589 + tmp590; + tmp620 = (K956940335 * tmp618) + (K290284677 * tmp619); + tmp630 = (K956940335 * tmp619) - (K290284677 * tmp618); + } + { + fftw_real tmp621; + fftw_real tmp622; + fftw_real tmp595; + fftw_real tmp598; + ASSERT_ALIGNED_DOUBLE; + tmp621 = tmp593 + tmp594; + tmp622 = tmp596 + tmp597; + tmp623 = (K956940335 * tmp621) - (K290284677 * tmp622); + tmp631 = (K290284677 * tmp621) + (K956940335 * tmp622); + tmp595 = tmp593 - tmp594; + tmp598 = tmp596 - tmp597; + tmp599 = (K471396736 * tmp595) - (K881921264 * tmp598); + tmp611 = (K881921264 * tmp595) + (K471396736 * tmp598); + } + { + fftw_real tmp588; + fftw_real tmp591; + fftw_real tmp601; + fftw_real tmp602; + ASSERT_ALIGNED_DOUBLE; + tmp588 = tmp586 - tmp587; + tmp591 = tmp589 - tmp590; + tmp592 = (K471396736 * tmp588) + (K881921264 * tmp591); + tmp610 = (K471396736 * tmp591) - (K881921264 * tmp588); + tmp601 = tmp537 + tmp538; + tmp602 = tmp476 + tmp479; + tmp603 = tmp601 - tmp602; + tmp625 = tmp601 + tmp602; + } + { + fftw_real tmp604; + fftw_real tmp605; + fftw_real tmp580; + fftw_real tmp583; + ASSERT_ALIGNED_DOUBLE; + tmp604 = (K831469612 * tmp581) - (K555570233 * tmp582); + tmp605 = (K831469612 * tmp579) + (K555570233 * tmp578); + tmp606 = tmp604 - tmp605; + tmp616 = tmp605 + tmp604; + tmp580 = (K831469612 * tmp578) - (K555570233 * tmp579); + tmp583 = (K555570233 * tmp581) + (K831469612 * tmp582); + tmp584 = tmp580 - tmp583; + tmp626 = tmp580 + tmp583; + } + { + fftw_real tmp585; + fftw_real tmp600; + fftw_real tmp607; + fftw_real tmp608; + ASSERT_ALIGNED_DOUBLE; + tmp585 = tmp577 + tmp584; + tmp600 = tmp592 + tmp599; + c_im(output[43 * ostride]) = tmp585 - tmp600; + c_im(output[11 * ostride]) = tmp585 + tmp600; + tmp607 = tmp603 - tmp606; + tmp608 = tmp599 - tmp592; + c_re(output[59 * ostride]) = tmp607 - tmp608; + c_re(output[27 * ostride]) = tmp607 + tmp608; + } + { + fftw_real tmp613; + fftw_real tmp614; + fftw_real tmp609; + fftw_real tmp612; + ASSERT_ALIGNED_DOUBLE; + tmp613 = tmp603 + tmp606; + tmp614 = tmp610 + tmp611; + c_re(output[43 * ostride]) = tmp613 - tmp614; + c_re(output[11 * ostride]) = tmp613 + tmp614; + tmp609 = tmp577 - tmp584; + tmp612 = tmp610 - tmp611; + c_im(output[59 * ostride]) = tmp609 - tmp612; + c_im(output[27 * ostride]) = tmp609 + tmp612; + } + { + fftw_real tmp617; + fftw_real tmp624; + fftw_real tmp627; + fftw_real tmp628; + ASSERT_ALIGNED_DOUBLE; + tmp617 = tmp615 + tmp616; + tmp624 = tmp620 + tmp623; + c_im(output[35 * ostride]) = tmp617 - tmp624; + c_im(output[3 * ostride]) = tmp617 + tmp624; + tmp627 = tmp625 - tmp626; + tmp628 = tmp623 - tmp620; + c_re(output[51 * ostride]) = tmp627 - tmp628; + c_re(output[19 * ostride]) = tmp627 + tmp628; + } + { + fftw_real tmp633; + fftw_real tmp634; + fftw_real tmp629; + fftw_real tmp632; + ASSERT_ALIGNED_DOUBLE; + tmp633 = tmp625 + tmp626; + tmp634 = tmp630 + tmp631; + c_re(output[35 * ostride]) = tmp633 - tmp634; + c_re(output[3 * ostride]) = tmp633 + tmp634; + tmp629 = tmp615 - tmp616; + tmp632 = tmp630 - tmp631; + c_im(output[51 * ostride]) = tmp629 - tmp632; + c_im(output[19 * ostride]) = tmp629 + tmp632; + } + } +} + +fftw_codelet_desc fftwi_no_twiddle_64_desc = { + "fftwi_no_twiddle_64", + (void (*)()) fftwi_no_twiddle_64, + 64, + FFTW_BACKWARD, + FFTW_NOTW, + 1420, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_7.c b/src/fftw/fni_7.c new file mode 100644 index 0000000..6fe8dc8 --- /dev/null +++ b/src/fftw/fni_7.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:20 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 7 */ + +/* + * This function contains 60 FP additions, 36 FP multiplications, + * (or, 60 additions, 36 multiplications, 0 fused multiply/add), + * 22 stack variables, and 28 memory accesses + */ +static const fftw_real K222520933 = +FFTW_KONST(+0.222520933956314404288902564496794759466355569); +static const fftw_real K900968867 = +FFTW_KONST(+0.900968867902419126236102319507445051165919162); +static const fftw_real K623489801 = +FFTW_KONST(+0.623489801858733530525004884004239810632274731); +static const fftw_real K781831482 = +FFTW_KONST(+0.781831482468029808708444526674057750232334519); +static const fftw_real K433883739 = +FFTW_KONST(+0.433883739117558120475768332848358754609990728); +static const fftw_real K974927912 = +FFTW_KONST(+0.974927912181823607018131682993931217232785801); + +/* + * Generator Id's : + * $Id: fni_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_7(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp1; + fftw_real tmp15; + fftw_real tmp4; + fftw_real tmp11; + fftw_real tmp21; + fftw_real tmp31; + fftw_real tmp7; + fftw_real tmp13; + fftw_real tmp24; + fftw_real tmp33; + fftw_real tmp10; + fftw_real tmp12; + fftw_real tmp18; + fftw_real tmp32; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp15 = c_im(input[0]); + { + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp19; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + tmp2 = c_re(input[istride]); + tmp3 = c_re(input[6 * istride]); + tmp4 = tmp2 + tmp3; + tmp11 = tmp2 - tmp3; + tmp19 = c_im(input[istride]); + tmp20 = c_im(input[6 * istride]); + tmp21 = tmp19 + tmp20; + tmp31 = tmp20 - tmp19; + } + { + fftw_real tmp5; + fftw_real tmp6; + fftw_real tmp22; + fftw_real tmp23; + ASSERT_ALIGNED_DOUBLE; + tmp5 = c_re(input[2 * istride]); + tmp6 = c_re(input[5 * istride]); + tmp7 = tmp5 + tmp6; + tmp13 = tmp5 - tmp6; + tmp22 = c_im(input[2 * istride]); + tmp23 = c_im(input[5 * istride]); + tmp24 = tmp22 + tmp23; + tmp33 = tmp23 - tmp22; + } + { + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp16; + fftw_real tmp17; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(input[3 * istride]); + tmp9 = c_re(input[4 * istride]); + tmp10 = tmp8 + tmp9; + tmp12 = tmp8 - tmp9; + tmp16 = c_im(input[3 * istride]); + tmp17 = c_im(input[4 * istride]); + tmp18 = tmp16 + tmp17; + tmp32 = tmp17 - tmp16; + } + { + fftw_real tmp36; + fftw_real tmp35; + fftw_real tmp26; + fftw_real tmp27; + ASSERT_ALIGNED_DOUBLE; + c_re(output[0]) = tmp1 + tmp4 + tmp7 + tmp10; + tmp36 = + (K974927912 * tmp33) + (K433883739 * tmp32) + + (K781831482 * tmp31); + tmp35 = + tmp1 + (K623489801 * tmp4) - (K900968867 * tmp10) - + (K222520933 * tmp7); + c_re(output[6 * ostride]) = tmp35 - tmp36; + c_re(output[ostride]) = tmp35 + tmp36; + { + fftw_real tmp38; + fftw_real tmp37; + fftw_real tmp34; + fftw_real tmp30; + ASSERT_ALIGNED_DOUBLE; + tmp38 = + (K974927912 * tmp32) + (K433883739 * tmp31) - + (K781831482 * tmp33); + tmp37 = + tmp1 + (K623489801 * tmp7) - (K222520933 * tmp10) - + (K900968867 * tmp4); + c_re(output[4 * ostride]) = tmp37 - tmp38; + c_re(output[3 * ostride]) = tmp37 + tmp38; + tmp34 = + (K974927912 * tmp31) - (K781831482 * tmp32) - + (K433883739 * tmp33); + tmp30 = + tmp1 + (K623489801 * tmp10) - (K900968867 * tmp7) - + (K222520933 * tmp4); + c_re(output[5 * ostride]) = tmp30 - tmp34; + c_re(output[2 * ostride]) = tmp30 + tmp34; + } + c_im(output[0]) = tmp15 + tmp24 + tmp18 + tmp21; + tmp26 = + (K433883739 * tmp11) + (K974927912 * tmp12) - + (K781831482 * tmp13); + tmp27 = + tmp15 + (K623489801 * tmp24) - (K900968867 * tmp21) - + (K222520933 * tmp18); + c_im(output[3 * ostride]) = tmp26 + tmp27; + c_im(output[4 * ostride]) = tmp27 - tmp26; + { + fftw_real tmp14; + fftw_real tmp25; + fftw_real tmp28; + fftw_real tmp29; + ASSERT_ALIGNED_DOUBLE; + tmp14 = + (K974927912 * tmp11) - (K781831482 * tmp12) - + (K433883739 * tmp13); + tmp25 = + tmp15 + (K623489801 * tmp18) - (K222520933 * tmp21) - + (K900968867 * tmp24); + c_im(output[2 * ostride]) = tmp14 + tmp25; + c_im(output[5 * ostride]) = tmp25 - tmp14; + tmp28 = + (K781831482 * tmp11) + (K974927912 * tmp13) + + (K433883739 * tmp12); + tmp29 = + tmp15 + (K623489801 * tmp21) - (K900968867 * tmp18) - + (K222520933 * tmp24); + c_im(output[ostride]) = tmp28 + tmp29; + c_im(output[6 * ostride]) = tmp29 - tmp28; + } + } +} + +fftw_codelet_desc fftwi_no_twiddle_7_desc = { + "fftwi_no_twiddle_7", + (void (*)()) fftwi_no_twiddle_7, + 7, + FFTW_BACKWARD, + FFTW_NOTW, + 166, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_8.c b/src/fftw/fni_8.c new file mode 100644 index 0000000..9bee5ad --- /dev/null +++ b/src/fftw/fni_8.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:25 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 8 */ + +/* + * This function contains 52 FP additions, 4 FP multiplications, + * (or, 52 additions, 4 multiplications, 0 fused multiply/add), + * 26 stack variables, and 32 memory accesses + */ +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: fni_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_8(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp3; + fftw_real tmp37; + fftw_real tmp18; + fftw_real tmp23; + fftw_real tmp6; + fftw_real tmp24; + fftw_real tmp21; + fftw_real tmp38; + fftw_real tmp13; + fftw_real tmp49; + fftw_real tmp35; + fftw_real tmp43; + fftw_real tmp10; + fftw_real tmp48; + fftw_real tmp30; + fftw_real tmp42; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp19; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[4 * istride]); + tmp3 = tmp1 + tmp2; + tmp37 = tmp1 - tmp2; + { + fftw_real tmp16; + fftw_real tmp17; + fftw_real tmp4; + fftw_real tmp5; + ASSERT_ALIGNED_DOUBLE; + tmp16 = c_im(input[0]); + tmp17 = c_im(input[4 * istride]); + tmp18 = tmp16 + tmp17; + tmp23 = tmp16 - tmp17; + tmp4 = c_re(input[2 * istride]); + tmp5 = c_re(input[6 * istride]); + tmp6 = tmp4 + tmp5; + tmp24 = tmp4 - tmp5; + } + tmp19 = c_im(input[2 * istride]); + tmp20 = c_im(input[6 * istride]); + tmp21 = tmp19 + tmp20; + tmp38 = tmp19 - tmp20; + { + fftw_real tmp11; + fftw_real tmp12; + fftw_real tmp31; + fftw_real tmp32; + fftw_real tmp33; + fftw_real tmp34; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[7 * istride]); + tmp12 = c_re(input[3 * istride]); + tmp31 = tmp11 - tmp12; + tmp32 = c_im(input[7 * istride]); + tmp33 = c_im(input[3 * istride]); + tmp34 = tmp32 - tmp33; + tmp13 = tmp11 + tmp12; + tmp49 = tmp32 + tmp33; + tmp35 = tmp31 + tmp34; + tmp43 = tmp34 - tmp31; + } + { + fftw_real tmp8; + fftw_real tmp9; + fftw_real tmp26; + fftw_real tmp27; + fftw_real tmp28; + fftw_real tmp29; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(input[istride]); + tmp9 = c_re(input[5 * istride]); + tmp26 = tmp8 - tmp9; + tmp27 = c_im(input[istride]); + tmp28 = c_im(input[5 * istride]); + tmp29 = tmp27 - tmp28; + tmp10 = tmp8 + tmp9; + tmp48 = tmp27 + tmp28; + tmp30 = tmp26 - tmp29; + tmp42 = tmp26 + tmp29; + } + } + { + fftw_real tmp7; + fftw_real tmp14; + fftw_real tmp15; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp7 = tmp3 + tmp6; + tmp14 = tmp10 + tmp13; + c_re(output[4 * ostride]) = tmp7 - tmp14; + c_re(output[0]) = tmp7 + tmp14; + tmp15 = tmp10 - tmp13; + tmp22 = tmp18 - tmp21; + c_im(output[2 * ostride]) = tmp15 + tmp22; + c_im(output[6 * ostride]) = tmp22 - tmp15; + } + { + fftw_real tmp47; + fftw_real tmp50; + fftw_real tmp51; + fftw_real tmp52; + ASSERT_ALIGNED_DOUBLE; + tmp47 = tmp18 + tmp21; + tmp50 = tmp48 + tmp49; + c_im(output[4 * ostride]) = tmp47 - tmp50; + c_im(output[0]) = tmp47 + tmp50; + tmp51 = tmp3 - tmp6; + tmp52 = tmp49 - tmp48; + c_re(output[6 * ostride]) = tmp51 - tmp52; + c_re(output[2 * ostride]) = tmp51 + tmp52; + } + { + fftw_real tmp25; + fftw_real tmp36; + fftw_real tmp39; + fftw_real tmp40; + ASSERT_ALIGNED_DOUBLE; + tmp25 = tmp23 - tmp24; + tmp36 = K707106781 * (tmp30 - tmp35); + c_im(output[7 * ostride]) = tmp25 - tmp36; + c_im(output[3 * ostride]) = tmp25 + tmp36; + tmp39 = tmp37 - tmp38; + tmp40 = K707106781 * (tmp30 + tmp35); + c_re(output[5 * ostride]) = tmp39 - tmp40; + c_re(output[ostride]) = tmp39 + tmp40; + } + { + fftw_real tmp45; + fftw_real tmp46; + fftw_real tmp41; + fftw_real tmp44; + ASSERT_ALIGNED_DOUBLE; + tmp45 = tmp37 + tmp38; + tmp46 = K707106781 * (tmp43 - tmp42); + c_re(output[7 * ostride]) = tmp45 - tmp46; + c_re(output[3 * ostride]) = tmp45 + tmp46; + tmp41 = tmp24 + tmp23; + tmp44 = K707106781 * (tmp42 + tmp43); + c_im(output[5 * ostride]) = tmp41 - tmp44; + c_im(output[ostride]) = tmp41 + tmp44; + } +} + +fftw_codelet_desc fftwi_no_twiddle_8_desc = { + "fftwi_no_twiddle_8", + (void (*)()) fftwi_no_twiddle_8, + 8, + FFTW_BACKWARD, + FFTW_NOTW, + 188, + 0, + (const int *) 0, +}; diff --git a/src/fftw/fni_9.c b/src/fftw/fni_9.c new file mode 100644 index 0000000..ce9f1f5 --- /dev/null +++ b/src/fftw/fni_9.c @@ -0,0 +1,283 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:06:25 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 9 */ + +/* + * This function contains 80 FP additions, 40 FP multiplications, + * (or, 60 additions, 20 multiplications, 20 fused multiply/add), + * 30 stack variables, and 36 memory accesses + */ +static const fftw_real K642787609 = +FFTW_KONST(+0.642787609686539326322643409907263432907559884); +static const fftw_real K766044443 = +FFTW_KONST(+0.766044443118978035202392650555416673935832457); +static const fftw_real K939692620 = +FFTW_KONST(+0.939692620785908384054109277324731469936208134); +static const fftw_real K342020143 = +FFTW_KONST(+0.342020143325668733044099614682259580763083368); +static const fftw_real K984807753 = +FFTW_KONST(+0.984807753012208059366743024589523013670643252); +static const fftw_real K173648177 = +FFTW_KONST(+0.173648177666930348851716626769314796000375677); +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); + +/* + * Generator Id's : + * $Id: fni_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: fni_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_no_twiddle_9(const fftw_complex *input, fftw_complex *output, + int istride, int ostride) +{ + fftw_real tmp5; + fftw_real tmp37; + fftw_real tmp57; + fftw_real tmp22; + fftw_real tmp56; + fftw_real tmp38; + fftw_real tmp10; + fftw_real tmp42; + fftw_real tmp66; + fftw_real tmp27; + fftw_real tmp45; + fftw_real tmp67; + fftw_real tmp15; + fftw_real tmp52; + fftw_real tmp69; + fftw_real tmp32; + fftw_real tmp49; + fftw_real tmp70; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp2; + fftw_real tmp3; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(input[0]); + tmp2 = c_re(input[3 * istride]); + tmp3 = c_re(input[6 * istride]); + tmp4 = tmp2 + tmp3; + tmp5 = tmp1 + tmp4; + tmp37 = tmp1 - (K500000000 * tmp4); + tmp57 = K866025403 * (tmp2 - tmp3); + } + { + fftw_real tmp18; + fftw_real tmp19; + fftw_real tmp20; + fftw_real tmp21; + ASSERT_ALIGNED_DOUBLE; + tmp18 = c_im(input[0]); + tmp19 = c_im(input[3 * istride]); + tmp20 = c_im(input[6 * istride]); + tmp21 = tmp19 + tmp20; + tmp22 = tmp18 + tmp21; + tmp56 = tmp18 - (K500000000 * tmp21); + tmp38 = K866025403 * (tmp20 - tmp19); + } + { + fftw_real tmp6; + fftw_real tmp23; + fftw_real tmp9; + fftw_real tmp44; + fftw_real tmp26; + fftw_real tmp41; + fftw_real tmp40; + fftw_real tmp43; + ASSERT_ALIGNED_DOUBLE; + tmp6 = c_re(input[istride]); + tmp23 = c_im(input[istride]); + { + fftw_real tmp7; + fftw_real tmp8; + fftw_real tmp24; + fftw_real tmp25; + ASSERT_ALIGNED_DOUBLE; + tmp7 = c_re(input[4 * istride]); + tmp8 = c_re(input[7 * istride]); + tmp9 = tmp7 + tmp8; + tmp44 = K866025403 * (tmp7 - tmp8); + tmp24 = c_im(input[4 * istride]); + tmp25 = c_im(input[7 * istride]); + tmp26 = tmp24 + tmp25; + tmp41 = K866025403 * (tmp25 - tmp24); + } + tmp10 = tmp6 + tmp9; + tmp40 = tmp6 - (K500000000 * tmp9); + tmp42 = tmp40 - tmp41; + tmp66 = tmp40 + tmp41; + tmp27 = tmp23 + tmp26; + tmp43 = tmp23 - (K500000000 * tmp26); + tmp45 = tmp43 - tmp44; + tmp67 = tmp44 + tmp43; + } + { + fftw_real tmp11; + fftw_real tmp28; + fftw_real tmp14; + fftw_real tmp48; + fftw_real tmp31; + fftw_real tmp51; + fftw_real tmp50; + fftw_real tmp47; + ASSERT_ALIGNED_DOUBLE; + tmp11 = c_re(input[2 * istride]); + tmp28 = c_im(input[2 * istride]); + { + fftw_real tmp12; + fftw_real tmp13; + fftw_real tmp29; + fftw_real tmp30; + ASSERT_ALIGNED_DOUBLE; + tmp12 = c_re(input[5 * istride]); + tmp13 = c_re(input[8 * istride]); + tmp14 = tmp12 + tmp13; + tmp48 = K866025403 * (tmp12 - tmp13); + tmp29 = c_im(input[5 * istride]); + tmp30 = c_im(input[8 * istride]); + tmp31 = tmp29 + tmp30; + tmp51 = K866025403 * (tmp30 - tmp29); + } + tmp15 = tmp11 + tmp14; + tmp50 = tmp11 - (K500000000 * tmp14); + tmp52 = tmp50 - tmp51; + tmp69 = tmp50 + tmp51; + tmp32 = tmp28 + tmp31; + tmp47 = tmp28 - (K500000000 * tmp31); + tmp49 = tmp47 - tmp48; + tmp70 = tmp48 + tmp47; + } + { + fftw_real tmp36; + fftw_real tmp16; + fftw_real tmp35; + fftw_real tmp17; + fftw_real tmp33; + fftw_real tmp34; + ASSERT_ALIGNED_DOUBLE; + tmp36 = K866025403 * (tmp32 - tmp27); + tmp16 = tmp10 + tmp15; + tmp35 = tmp5 - (K500000000 * tmp16); + c_re(output[0]) = tmp5 + tmp16; + c_re(output[3 * ostride]) = tmp35 + tmp36; + c_re(output[6 * ostride]) = tmp35 - tmp36; + tmp17 = K866025403 * (tmp10 - tmp15); + tmp33 = tmp27 + tmp32; + tmp34 = tmp22 - (K500000000 * tmp33); + c_im(output[3 * ostride]) = tmp17 + tmp34; + c_im(output[6 * ostride]) = tmp34 - tmp17; + c_im(output[0]) = tmp22 + tmp33; + } + { + fftw_real tmp39; + fftw_real tmp61; + fftw_real tmp64; + fftw_real tmp58; + fftw_real tmp54; + fftw_real tmp55; + fftw_real tmp63; + fftw_real tmp62; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp59; + fftw_real tmp60; + fftw_real tmp46; + fftw_real tmp53; + ASSERT_ALIGNED_DOUBLE; + tmp39 = tmp37 - tmp38; + tmp59 = (K173648177 * tmp45) + (K984807753 * tmp42); + tmp60 = (K342020143 * tmp52) - (K939692620 * tmp49); + tmp61 = tmp59 + tmp60; + tmp64 = K866025403 * (tmp60 - tmp59); + tmp58 = tmp56 - tmp57; + tmp46 = (K173648177 * tmp42) - (K984807753 * tmp45); + tmp53 = (K342020143 * tmp49) + (K939692620 * tmp52); + tmp54 = tmp46 - tmp53; + tmp55 = K866025403 * (tmp46 + tmp53); + } + c_re(output[2 * ostride]) = tmp39 + tmp54; + tmp63 = tmp39 - (K500000000 * tmp54); + c_re(output[8 * ostride]) = tmp63 - tmp64; + c_re(output[5 * ostride]) = tmp63 + tmp64; + c_im(output[2 * ostride]) = tmp58 + tmp61; + tmp62 = tmp58 - (K500000000 * tmp61); + c_im(output[5 * ostride]) = tmp55 + tmp62; + c_im(output[8 * ostride]) = tmp62 - tmp55; + } + { + fftw_real tmp65; + fftw_real tmp77; + fftw_real tmp80; + fftw_real tmp74; + fftw_real tmp72; + fftw_real tmp73; + fftw_real tmp79; + fftw_real tmp78; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp75; + fftw_real tmp76; + fftw_real tmp68; + fftw_real tmp71; + ASSERT_ALIGNED_DOUBLE; + tmp65 = tmp37 + tmp38; + tmp75 = (K766044443 * tmp67) + (K642787609 * tmp66); + tmp76 = (K173648177 * tmp70) + (K984807753 * tmp69); + tmp77 = tmp75 + tmp76; + tmp80 = K866025403 * (tmp76 - tmp75); + tmp74 = tmp57 + tmp56; + tmp68 = (K766044443 * tmp66) - (K642787609 * tmp67); + tmp71 = (K173648177 * tmp69) - (K984807753 * tmp70); + tmp72 = tmp68 + tmp71; + tmp73 = K866025403 * (tmp68 - tmp71); + } + c_re(output[ostride]) = tmp65 + tmp72; + tmp79 = tmp65 - (K500000000 * tmp72); + c_re(output[7 * ostride]) = tmp79 - tmp80; + c_re(output[4 * ostride]) = tmp79 + tmp80; + c_im(output[ostride]) = tmp74 + tmp77; + tmp78 = tmp74 - (K500000000 * tmp77); + c_im(output[4 * ostride]) = tmp73 + tmp78; + c_im(output[7 * ostride]) = tmp78 - tmp73; + } +} + +fftw_codelet_desc fftwi_no_twiddle_9_desc = { + "fftwi_no_twiddle_9", + (void (*)()) fftwi_no_twiddle_9, + 9, + FFTW_BACKWARD, + FFTW_NOTW, + 210, + 0, + (const int *) 0, +}; diff --git a/src/fftw/ftw_10.c b/src/fftw/ftw_10.c new file mode 100644 index 0000000..fea0234 --- /dev/null +++ b/src/fftw/ftw_10.c @@ -0,0 +1,378 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:07:41 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 10 */ + +/* + * This function contains 102 FP additions, 60 FP multiplications, + * (or, 72 additions, 30 multiplications, 30 fused multiply/add), + * 42 stack variables, and 40 memory accesses + */ +static const fftw_real K587785252 = +FFTW_KONST(+0.587785252292473129168705954639072768597652438); +static const fftw_real K951056516 = +FFTW_KONST(+0.951056516295153572116439333379382143405698634); +static const fftw_real K250000000 = +FFTW_KONST(+0.250000000000000000000000000000000000000000000); +static const fftw_real K559016994 = +FFTW_KONST(+0.559016994374947424102293417182819058860154590); + +/* + * Generator Id's : + * $Id: ftw_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_twiddle_10(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 9) { + fftw_real tmp7; + fftw_real tmp55; + fftw_real tmp100; + fftw_real tmp112; + fftw_real tmp41; + fftw_real tmp52; + fftw_real tmp53; + fftw_real tmp59; + fftw_real tmp60; + fftw_real tmp61; + fftw_real tmp75; + fftw_real tmp78; + fftw_real tmp110; + fftw_real tmp86; + fftw_real tmp87; + fftw_real tmp96; + fftw_real tmp18; + fftw_real tmp29; + fftw_real tmp30; + fftw_real tmp56; + fftw_real tmp57; + fftw_real tmp58; + fftw_real tmp68; + fftw_real tmp71; + fftw_real tmp109; + fftw_real tmp89; + fftw_real tmp90; + fftw_real tmp95; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp99; + fftw_real tmp6; + fftw_real tmp98; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp99 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[5 * iostride]); + tmp5 = c_im(inout[5 * iostride]); + tmp2 = c_re(W[4]); + tmp4 = c_im(W[4]); + tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5); + tmp98 = (tmp4 * tmp3) + (tmp2 * tmp5); + } + tmp7 = tmp1 - tmp6; + tmp55 = tmp1 + tmp6; + tmp100 = tmp98 + tmp99; + tmp112 = tmp99 - tmp98; + } + { + fftw_real tmp35; + fftw_real tmp73; + fftw_real tmp51; + fftw_real tmp77; + fftw_real tmp40; + fftw_real tmp74; + fftw_real tmp46; + fftw_real tmp76; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp32; + fftw_real tmp34; + fftw_real tmp31; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(inout[4 * iostride]); + tmp34 = c_im(inout[4 * iostride]); + tmp31 = c_re(W[3]); + tmp33 = c_im(W[3]); + tmp35 = (tmp31 * tmp32) - (tmp33 * tmp34); + tmp73 = (tmp33 * tmp32) + (tmp31 * tmp34); + } + { + fftw_real tmp48; + fftw_real tmp50; + fftw_real tmp47; + fftw_real tmp49; + ASSERT_ALIGNED_DOUBLE; + tmp48 = c_re(inout[iostride]); + tmp50 = c_im(inout[iostride]); + tmp47 = c_re(W[0]); + tmp49 = c_im(W[0]); + tmp51 = (tmp47 * tmp48) - (tmp49 * tmp50); + tmp77 = (tmp49 * tmp48) + (tmp47 * tmp50); + } + { + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp36; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp37 = c_re(inout[9 * iostride]); + tmp39 = c_im(inout[9 * iostride]); + tmp36 = c_re(W[8]); + tmp38 = c_im(W[8]); + tmp40 = (tmp36 * tmp37) - (tmp38 * tmp39); + tmp74 = (tmp38 * tmp37) + (tmp36 * tmp39); + } + { + fftw_real tmp43; + fftw_real tmp45; + fftw_real tmp42; + fftw_real tmp44; + ASSERT_ALIGNED_DOUBLE; + tmp43 = c_re(inout[6 * iostride]); + tmp45 = c_im(inout[6 * iostride]); + tmp42 = c_re(W[5]); + tmp44 = c_im(W[5]); + tmp46 = (tmp42 * tmp43) - (tmp44 * tmp45); + tmp76 = (tmp44 * tmp43) + (tmp42 * tmp45); + } + tmp41 = tmp35 - tmp40; + tmp52 = tmp46 - tmp51; + tmp53 = tmp41 + tmp52; + tmp59 = tmp35 + tmp40; + tmp60 = tmp46 + tmp51; + tmp61 = tmp59 + tmp60; + tmp75 = tmp73 - tmp74; + tmp78 = tmp76 - tmp77; + tmp110 = tmp75 + tmp78; + tmp86 = tmp73 + tmp74; + tmp87 = tmp76 + tmp77; + tmp96 = tmp86 + tmp87; + } + { + fftw_real tmp12; + fftw_real tmp66; + fftw_real tmp28; + fftw_real tmp70; + fftw_real tmp17; + fftw_real tmp67; + fftw_real tmp23; + fftw_real tmp69; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[2 * iostride]); + tmp11 = c_im(inout[2 * iostride]); + tmp8 = c_re(W[1]); + tmp10 = c_im(W[1]); + tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11); + tmp66 = (tmp10 * tmp9) + (tmp8 * tmp11); + } + { + fftw_real tmp25; + fftw_real tmp27; + fftw_real tmp24; + fftw_real tmp26; + ASSERT_ALIGNED_DOUBLE; + tmp25 = c_re(inout[3 * iostride]); + tmp27 = c_im(inout[3 * iostride]); + tmp24 = c_re(W[2]); + tmp26 = c_im(W[2]); + tmp28 = (tmp24 * tmp25) - (tmp26 * tmp27); + tmp70 = (tmp26 * tmp25) + (tmp24 * tmp27); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[7 * iostride]); + tmp16 = c_im(inout[7 * iostride]); + tmp13 = c_re(W[6]); + tmp15 = c_im(W[6]); + tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16); + tmp67 = (tmp15 * tmp14) + (tmp13 * tmp16); + } + { + fftw_real tmp20; + fftw_real tmp22; + fftw_real tmp19; + fftw_real tmp21; + ASSERT_ALIGNED_DOUBLE; + tmp20 = c_re(inout[8 * iostride]); + tmp22 = c_im(inout[8 * iostride]); + tmp19 = c_re(W[7]); + tmp21 = c_im(W[7]); + tmp23 = (tmp19 * tmp20) - (tmp21 * tmp22); + tmp69 = (tmp21 * tmp20) + (tmp19 * tmp22); + } + tmp18 = tmp12 - tmp17; + tmp29 = tmp23 - tmp28; + tmp30 = tmp18 + tmp29; + tmp56 = tmp12 + tmp17; + tmp57 = tmp23 + tmp28; + tmp58 = tmp56 + tmp57; + tmp68 = tmp66 - tmp67; + tmp71 = tmp69 - tmp70; + tmp109 = tmp68 + tmp71; + tmp89 = tmp66 + tmp67; + tmp90 = tmp69 + tmp70; + tmp95 = tmp89 + tmp90; + } + { + fftw_real tmp63; + fftw_real tmp54; + fftw_real tmp64; + fftw_real tmp80; + fftw_real tmp82; + fftw_real tmp72; + fftw_real tmp79; + fftw_real tmp81; + fftw_real tmp65; + ASSERT_ALIGNED_DOUBLE; + tmp63 = K559016994 * (tmp30 - tmp53); + tmp54 = tmp30 + tmp53; + tmp64 = tmp7 - (K250000000 * tmp54); + tmp72 = tmp68 - tmp71; + tmp79 = tmp75 - tmp78; + tmp80 = (K951056516 * tmp72) + (K587785252 * tmp79); + tmp82 = (K951056516 * tmp79) - (K587785252 * tmp72); + c_re(inout[5 * iostride]) = tmp7 + tmp54; + tmp81 = tmp64 - tmp63; + c_re(inout[7 * iostride]) = tmp81 - tmp82; + c_re(inout[3 * iostride]) = tmp81 + tmp82; + tmp65 = tmp63 + tmp64; + c_re(inout[9 * iostride]) = tmp65 - tmp80; + c_re(inout[iostride]) = tmp65 + tmp80; + } + { + fftw_real tmp111; + fftw_real tmp113; + fftw_real tmp114; + fftw_real tmp118; + fftw_real tmp120; + fftw_real tmp116; + fftw_real tmp117; + fftw_real tmp119; + fftw_real tmp115; + ASSERT_ALIGNED_DOUBLE; + tmp111 = K559016994 * (tmp109 - tmp110); + tmp113 = tmp109 + tmp110; + tmp114 = tmp112 - (K250000000 * tmp113); + tmp116 = tmp18 - tmp29; + tmp117 = tmp41 - tmp52; + tmp118 = (K951056516 * tmp116) + (K587785252 * tmp117); + tmp120 = (K951056516 * tmp117) - (K587785252 * tmp116); + c_im(inout[5 * iostride]) = tmp113 + tmp112; + tmp119 = tmp114 - tmp111; + c_im(inout[3 * iostride]) = tmp119 - tmp120; + c_im(inout[7 * iostride]) = tmp120 + tmp119; + tmp115 = tmp111 + tmp114; + c_im(inout[iostride]) = tmp115 - tmp118; + c_im(inout[9 * iostride]) = tmp118 + tmp115; + } + { + fftw_real tmp84; + fftw_real tmp62; + fftw_real tmp83; + fftw_real tmp92; + fftw_real tmp94; + fftw_real tmp88; + fftw_real tmp91; + fftw_real tmp93; + fftw_real tmp85; + ASSERT_ALIGNED_DOUBLE; + tmp84 = K559016994 * (tmp58 - tmp61); + tmp62 = tmp58 + tmp61; + tmp83 = tmp55 - (K250000000 * tmp62); + tmp88 = tmp86 - tmp87; + tmp91 = tmp89 - tmp90; + tmp92 = (K951056516 * tmp88) - (K587785252 * tmp91); + tmp94 = (K951056516 * tmp91) + (K587785252 * tmp88); + c_re(inout[0]) = tmp55 + tmp62; + tmp93 = tmp84 + tmp83; + c_re(inout[4 * iostride]) = tmp93 - tmp94; + c_re(inout[6 * iostride]) = tmp93 + tmp94; + tmp85 = tmp83 - tmp84; + c_re(inout[2 * iostride]) = tmp85 - tmp92; + c_re(inout[8 * iostride]) = tmp85 + tmp92; + } + { + fftw_real tmp105; + fftw_real tmp97; + fftw_real tmp104; + fftw_real tmp103; + fftw_real tmp107; + fftw_real tmp101; + fftw_real tmp102; + fftw_real tmp108; + fftw_real tmp106; + ASSERT_ALIGNED_DOUBLE; + tmp105 = K559016994 * (tmp95 - tmp96); + tmp97 = tmp95 + tmp96; + tmp104 = tmp100 - (K250000000 * tmp97); + tmp101 = tmp59 - tmp60; + tmp102 = tmp56 - tmp57; + tmp103 = (K951056516 * tmp101) - (K587785252 * tmp102); + tmp107 = (K951056516 * tmp102) + (K587785252 * tmp101); + c_im(inout[0]) = tmp97 + tmp100; + tmp108 = tmp105 + tmp104; + c_im(inout[4 * iostride]) = tmp107 + tmp108; + c_im(inout[6 * iostride]) = tmp108 - tmp107; + tmp106 = tmp104 - tmp105; + c_im(inout[2 * iostride]) = tmp103 + tmp106; + c_im(inout[8 * iostride]) = tmp106 - tmp103; + } + } +} + +static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; +fftw_codelet_desc fftw_twiddle_10_desc = { + "fftw_twiddle_10", + (void (*)()) fftw_twiddle_10, + 10, + FFTW_FORWARD, + FFTW_TWIDDLE, + 220, + 9, + twiddle_order, +}; diff --git a/src/fftw/ftw_16.c b/src/fftw/ftw_16.c new file mode 100644 index 0000000..1df4281 --- /dev/null +++ b/src/fftw/ftw_16.c @@ -0,0 +1,614 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:07:43 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 16 */ + +/* + * This function contains 174 FP additions, 84 FP multiplications, + * (or, 136 additions, 46 multiplications, 38 fused multiply/add), + * 50 stack variables, and 64 memory accesses + */ +static const fftw_real K382683432 = +FFTW_KONST(+0.382683432365089771728459984030398866761344562); +static const fftw_real K923879532 = +FFTW_KONST(+0.923879532511286756128183189396788286822416626); +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: ftw_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_twiddle_16(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 15) { + fftw_real tmp7; + fftw_real tmp91; + fftw_real tmp180; + fftw_real tmp193; + fftw_real tmp18; + fftw_real tmp194; + fftw_real tmp94; + fftw_real tmp177; + fftw_real tmp77; + fftw_real tmp88; + fftw_real tmp161; + fftw_real tmp128; + fftw_real tmp144; + fftw_real tmp162; + fftw_real tmp163; + fftw_real tmp164; + fftw_real tmp123; + fftw_real tmp143; + fftw_real tmp30; + fftw_real tmp152; + fftw_real tmp100; + fftw_real tmp136; + fftw_real tmp41; + fftw_real tmp153; + fftw_real tmp105; + fftw_real tmp137; + fftw_real tmp54; + fftw_real tmp65; + fftw_real tmp156; + fftw_real tmp117; + fftw_real tmp141; + fftw_real tmp157; + fftw_real tmp158; + fftw_real tmp159; + fftw_real tmp112; + fftw_real tmp140; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp179; + fftw_real tmp6; + fftw_real tmp178; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp179 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[8 * iostride]); + tmp5 = c_im(inout[8 * iostride]); + tmp2 = c_re(W[7]); + tmp4 = c_im(W[7]); + tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5); + tmp178 = (tmp4 * tmp3) + (tmp2 * tmp5); + } + tmp7 = tmp1 + tmp6; + tmp91 = tmp1 - tmp6; + tmp180 = tmp178 + tmp179; + tmp193 = tmp179 - tmp178; + } + { + fftw_real tmp12; + fftw_real tmp92; + fftw_real tmp17; + fftw_real tmp93; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[4 * iostride]); + tmp11 = c_im(inout[4 * iostride]); + tmp8 = c_re(W[3]); + tmp10 = c_im(W[3]); + tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11); + tmp92 = (tmp10 * tmp9) + (tmp8 * tmp11); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[12 * iostride]); + tmp16 = c_im(inout[12 * iostride]); + tmp13 = c_re(W[11]); + tmp15 = c_im(W[11]); + tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16); + tmp93 = (tmp15 * tmp14) + (tmp13 * tmp16); + } + tmp18 = tmp12 + tmp17; + tmp194 = tmp12 - tmp17; + tmp94 = tmp92 - tmp93; + tmp177 = tmp92 + tmp93; + } + { + fftw_real tmp71; + fftw_real tmp124; + fftw_real tmp87; + fftw_real tmp121; + fftw_real tmp76; + fftw_real tmp125; + fftw_real tmp82; + fftw_real tmp120; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp68; + fftw_real tmp70; + fftw_real tmp67; + fftw_real tmp69; + ASSERT_ALIGNED_DOUBLE; + tmp68 = c_re(inout[15 * iostride]); + tmp70 = c_im(inout[15 * iostride]); + tmp67 = c_re(W[14]); + tmp69 = c_im(W[14]); + tmp71 = (tmp67 * tmp68) - (tmp69 * tmp70); + tmp124 = (tmp69 * tmp68) + (tmp67 * tmp70); + } + { + fftw_real tmp84; + fftw_real tmp86; + fftw_real tmp83; + fftw_real tmp85; + ASSERT_ALIGNED_DOUBLE; + tmp84 = c_re(inout[11 * iostride]); + tmp86 = c_im(inout[11 * iostride]); + tmp83 = c_re(W[10]); + tmp85 = c_im(W[10]); + tmp87 = (tmp83 * tmp84) - (tmp85 * tmp86); + tmp121 = (tmp85 * tmp84) + (tmp83 * tmp86); + } + { + fftw_real tmp73; + fftw_real tmp75; + fftw_real tmp72; + fftw_real tmp74; + ASSERT_ALIGNED_DOUBLE; + tmp73 = c_re(inout[7 * iostride]); + tmp75 = c_im(inout[7 * iostride]); + tmp72 = c_re(W[6]); + tmp74 = c_im(W[6]); + tmp76 = (tmp72 * tmp73) - (tmp74 * tmp75); + tmp125 = (tmp74 * tmp73) + (tmp72 * tmp75); + } + { + fftw_real tmp79; + fftw_real tmp81; + fftw_real tmp78; + fftw_real tmp80; + ASSERT_ALIGNED_DOUBLE; + tmp79 = c_re(inout[3 * iostride]); + tmp81 = c_im(inout[3 * iostride]); + tmp78 = c_re(W[2]); + tmp80 = c_im(W[2]); + tmp82 = (tmp78 * tmp79) - (tmp80 * tmp81); + tmp120 = (tmp80 * tmp79) + (tmp78 * tmp81); + } + { + fftw_real tmp126; + fftw_real tmp127; + fftw_real tmp119; + fftw_real tmp122; + ASSERT_ALIGNED_DOUBLE; + tmp77 = tmp71 + tmp76; + tmp88 = tmp82 + tmp87; + tmp161 = tmp77 - tmp88; + tmp126 = tmp124 - tmp125; + tmp127 = tmp82 - tmp87; + tmp128 = tmp126 + tmp127; + tmp144 = tmp126 - tmp127; + tmp162 = tmp124 + tmp125; + tmp163 = tmp120 + tmp121; + tmp164 = tmp162 - tmp163; + tmp119 = tmp71 - tmp76; + tmp122 = tmp120 - tmp121; + tmp123 = tmp119 - tmp122; + tmp143 = tmp119 + tmp122; + } + } + { + fftw_real tmp24; + fftw_real tmp96; + fftw_real tmp29; + fftw_real tmp97; + fftw_real tmp98; + fftw_real tmp99; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp21; + fftw_real tmp23; + fftw_real tmp20; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp21 = c_re(inout[2 * iostride]); + tmp23 = c_im(inout[2 * iostride]); + tmp20 = c_re(W[1]); + tmp22 = c_im(W[1]); + tmp24 = (tmp20 * tmp21) - (tmp22 * tmp23); + tmp96 = (tmp22 * tmp21) + (tmp20 * tmp23); + } + { + fftw_real tmp26; + fftw_real tmp28; + fftw_real tmp25; + fftw_real tmp27; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(inout[10 * iostride]); + tmp28 = c_im(inout[10 * iostride]); + tmp25 = c_re(W[9]); + tmp27 = c_im(W[9]); + tmp29 = (tmp25 * tmp26) - (tmp27 * tmp28); + tmp97 = (tmp27 * tmp26) + (tmp25 * tmp28); + } + tmp30 = tmp24 + tmp29; + tmp152 = tmp96 + tmp97; + tmp98 = tmp96 - tmp97; + tmp99 = tmp24 - tmp29; + tmp100 = tmp98 - tmp99; + tmp136 = tmp99 + tmp98; + } + { + fftw_real tmp35; + fftw_real tmp102; + fftw_real tmp40; + fftw_real tmp103; + fftw_real tmp101; + fftw_real tmp104; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp32; + fftw_real tmp34; + fftw_real tmp31; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(inout[14 * iostride]); + tmp34 = c_im(inout[14 * iostride]); + tmp31 = c_re(W[13]); + tmp33 = c_im(W[13]); + tmp35 = (tmp31 * tmp32) - (tmp33 * tmp34); + tmp102 = (tmp33 * tmp32) + (tmp31 * tmp34); + } + { + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp36; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp37 = c_re(inout[6 * iostride]); + tmp39 = c_im(inout[6 * iostride]); + tmp36 = c_re(W[5]); + tmp38 = c_im(W[5]); + tmp40 = (tmp36 * tmp37) - (tmp38 * tmp39); + tmp103 = (tmp38 * tmp37) + (tmp36 * tmp39); + } + tmp41 = tmp35 + tmp40; + tmp153 = tmp102 + tmp103; + tmp101 = tmp35 - tmp40; + tmp104 = tmp102 - tmp103; + tmp105 = tmp101 + tmp104; + tmp137 = tmp101 - tmp104; + } + { + fftw_real tmp48; + fftw_real tmp108; + fftw_real tmp64; + fftw_real tmp115; + fftw_real tmp53; + fftw_real tmp109; + fftw_real tmp59; + fftw_real tmp114; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp45; + fftw_real tmp47; + fftw_real tmp44; + fftw_real tmp46; + ASSERT_ALIGNED_DOUBLE; + tmp45 = c_re(inout[iostride]); + tmp47 = c_im(inout[iostride]); + tmp44 = c_re(W[0]); + tmp46 = c_im(W[0]); + tmp48 = (tmp44 * tmp45) - (tmp46 * tmp47); + tmp108 = (tmp46 * tmp45) + (tmp44 * tmp47); + } + { + fftw_real tmp61; + fftw_real tmp63; + fftw_real tmp60; + fftw_real tmp62; + ASSERT_ALIGNED_DOUBLE; + tmp61 = c_re(inout[13 * iostride]); + tmp63 = c_im(inout[13 * iostride]); + tmp60 = c_re(W[12]); + tmp62 = c_im(W[12]); + tmp64 = (tmp60 * tmp61) - (tmp62 * tmp63); + tmp115 = (tmp62 * tmp61) + (tmp60 * tmp63); + } + { + fftw_real tmp50; + fftw_real tmp52; + fftw_real tmp49; + fftw_real tmp51; + ASSERT_ALIGNED_DOUBLE; + tmp50 = c_re(inout[9 * iostride]); + tmp52 = c_im(inout[9 * iostride]); + tmp49 = c_re(W[8]); + tmp51 = c_im(W[8]); + tmp53 = (tmp49 * tmp50) - (tmp51 * tmp52); + tmp109 = (tmp51 * tmp50) + (tmp49 * tmp52); + } + { + fftw_real tmp56; + fftw_real tmp58; + fftw_real tmp55; + fftw_real tmp57; + ASSERT_ALIGNED_DOUBLE; + tmp56 = c_re(inout[5 * iostride]); + tmp58 = c_im(inout[5 * iostride]); + tmp55 = c_re(W[4]); + tmp57 = c_im(W[4]); + tmp59 = (tmp55 * tmp56) - (tmp57 * tmp58); + tmp114 = (tmp57 * tmp56) + (tmp55 * tmp58); + } + { + fftw_real tmp113; + fftw_real tmp116; + fftw_real tmp110; + fftw_real tmp111; + ASSERT_ALIGNED_DOUBLE; + tmp54 = tmp48 + tmp53; + tmp65 = tmp59 + tmp64; + tmp156 = tmp54 - tmp65; + tmp113 = tmp48 - tmp53; + tmp116 = tmp114 - tmp115; + tmp117 = tmp113 - tmp116; + tmp141 = tmp113 + tmp116; + tmp157 = tmp108 + tmp109; + tmp158 = tmp114 + tmp115; + tmp159 = tmp157 - tmp158; + tmp110 = tmp108 - tmp109; + tmp111 = tmp59 - tmp64; + tmp112 = tmp110 + tmp111; + tmp140 = tmp110 - tmp111; + } + } + { + fftw_real tmp107; + fftw_real tmp131; + fftw_real tmp202; + fftw_real tmp204; + fftw_real tmp130; + fftw_real tmp203; + fftw_real tmp134; + fftw_real tmp199; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp95; + fftw_real tmp106; + fftw_real tmp200; + fftw_real tmp201; + ASSERT_ALIGNED_DOUBLE; + tmp95 = tmp91 - tmp94; + tmp106 = K707106781 * (tmp100 - tmp105); + tmp107 = tmp95 + tmp106; + tmp131 = tmp95 - tmp106; + tmp200 = K707106781 * (tmp137 - tmp136); + tmp201 = tmp194 + tmp193; + tmp202 = tmp200 + tmp201; + tmp204 = tmp201 - tmp200; + } + { + fftw_real tmp118; + fftw_real tmp129; + fftw_real tmp132; + fftw_real tmp133; + ASSERT_ALIGNED_DOUBLE; + tmp118 = (K923879532 * tmp112) + (K382683432 * tmp117); + tmp129 = (K382683432 * tmp123) - (K923879532 * tmp128); + tmp130 = tmp118 + tmp129; + tmp203 = tmp129 - tmp118; + tmp132 = (K382683432 * tmp112) - (K923879532 * tmp117); + tmp133 = (K382683432 * tmp128) + (K923879532 * tmp123); + tmp134 = tmp132 - tmp133; + tmp199 = tmp132 + tmp133; + } + c_re(inout[11 * iostride]) = tmp107 - tmp130; + c_re(inout[3 * iostride]) = tmp107 + tmp130; + c_re(inout[15 * iostride]) = tmp131 - tmp134; + c_re(inout[7 * iostride]) = tmp131 + tmp134; + c_im(inout[3 * iostride]) = tmp199 + tmp202; + c_im(inout[11 * iostride]) = tmp202 - tmp199; + c_im(inout[7 * iostride]) = tmp203 + tmp204; + c_im(inout[15 * iostride]) = tmp204 - tmp203; + } + { + fftw_real tmp139; + fftw_real tmp147; + fftw_real tmp196; + fftw_real tmp198; + fftw_real tmp146; + fftw_real tmp197; + fftw_real tmp150; + fftw_real tmp191; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp135; + fftw_real tmp138; + fftw_real tmp192; + fftw_real tmp195; + ASSERT_ALIGNED_DOUBLE; + tmp135 = tmp91 + tmp94; + tmp138 = K707106781 * (tmp136 + tmp137); + tmp139 = tmp135 + tmp138; + tmp147 = tmp135 - tmp138; + tmp192 = K707106781 * (tmp100 + tmp105); + tmp195 = tmp193 - tmp194; + tmp196 = tmp192 + tmp195; + tmp198 = tmp195 - tmp192; + } + { + fftw_real tmp142; + fftw_real tmp145; + fftw_real tmp148; + fftw_real tmp149; + ASSERT_ALIGNED_DOUBLE; + tmp142 = (K382683432 * tmp140) + (K923879532 * tmp141); + tmp145 = (K923879532 * tmp143) - (K382683432 * tmp144); + tmp146 = tmp142 + tmp145; + tmp197 = tmp145 - tmp142; + tmp148 = (K923879532 * tmp140) - (K382683432 * tmp141); + tmp149 = (K923879532 * tmp144) + (K382683432 * tmp143); + tmp150 = tmp148 - tmp149; + tmp191 = tmp148 + tmp149; + } + c_re(inout[9 * iostride]) = tmp139 - tmp146; + c_re(inout[iostride]) = tmp139 + tmp146; + c_re(inout[13 * iostride]) = tmp147 - tmp150; + c_re(inout[5 * iostride]) = tmp147 + tmp150; + c_im(inout[iostride]) = tmp191 + tmp196; + c_im(inout[9 * iostride]) = tmp196 - tmp191; + c_im(inout[5 * iostride]) = tmp197 + tmp198; + c_im(inout[13 * iostride]) = tmp198 - tmp197; + } + { + fftw_real tmp155; + fftw_real tmp167; + fftw_real tmp188; + fftw_real tmp190; + fftw_real tmp166; + fftw_real tmp189; + fftw_real tmp170; + fftw_real tmp185; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp151; + fftw_real tmp154; + fftw_real tmp186; + fftw_real tmp187; + ASSERT_ALIGNED_DOUBLE; + tmp151 = tmp7 - tmp18; + tmp154 = tmp152 - tmp153; + tmp155 = tmp151 + tmp154; + tmp167 = tmp151 - tmp154; + tmp186 = tmp41 - tmp30; + tmp187 = tmp180 - tmp177; + tmp188 = tmp186 + tmp187; + tmp190 = tmp187 - tmp186; + } + { + fftw_real tmp160; + fftw_real tmp165; + fftw_real tmp168; + fftw_real tmp169; + ASSERT_ALIGNED_DOUBLE; + tmp160 = tmp156 + tmp159; + tmp165 = tmp161 - tmp164; + tmp166 = K707106781 * (tmp160 + tmp165); + tmp189 = K707106781 * (tmp165 - tmp160); + tmp168 = tmp159 - tmp156; + tmp169 = tmp161 + tmp164; + tmp170 = K707106781 * (tmp168 - tmp169); + tmp185 = K707106781 * (tmp168 + tmp169); + } + c_re(inout[10 * iostride]) = tmp155 - tmp166; + c_re(inout[2 * iostride]) = tmp155 + tmp166; + c_re(inout[14 * iostride]) = tmp167 - tmp170; + c_re(inout[6 * iostride]) = tmp167 + tmp170; + c_im(inout[2 * iostride]) = tmp185 + tmp188; + c_im(inout[10 * iostride]) = tmp188 - tmp185; + c_im(inout[6 * iostride]) = tmp189 + tmp190; + c_im(inout[14 * iostride]) = tmp190 - tmp189; + } + { + fftw_real tmp43; + fftw_real tmp171; + fftw_real tmp182; + fftw_real tmp184; + fftw_real tmp90; + fftw_real tmp183; + fftw_real tmp174; + fftw_real tmp175; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp19; + fftw_real tmp42; + fftw_real tmp176; + fftw_real tmp181; + ASSERT_ALIGNED_DOUBLE; + tmp19 = tmp7 + tmp18; + tmp42 = tmp30 + tmp41; + tmp43 = tmp19 + tmp42; + tmp171 = tmp19 - tmp42; + tmp176 = tmp152 + tmp153; + tmp181 = tmp177 + tmp180; + tmp182 = tmp176 + tmp181; + tmp184 = tmp181 - tmp176; + } + { + fftw_real tmp66; + fftw_real tmp89; + fftw_real tmp172; + fftw_real tmp173; + ASSERT_ALIGNED_DOUBLE; + tmp66 = tmp54 + tmp65; + tmp89 = tmp77 + tmp88; + tmp90 = tmp66 + tmp89; + tmp183 = tmp89 - tmp66; + tmp172 = tmp157 + tmp158; + tmp173 = tmp162 + tmp163; + tmp174 = tmp172 - tmp173; + tmp175 = tmp172 + tmp173; + } + c_re(inout[8 * iostride]) = tmp43 - tmp90; + c_re(inout[0]) = tmp43 + tmp90; + c_re(inout[12 * iostride]) = tmp171 - tmp174; + c_re(inout[4 * iostride]) = tmp171 + tmp174; + c_im(inout[0]) = tmp175 + tmp182; + c_im(inout[8 * iostride]) = tmp182 - tmp175; + c_im(inout[4 * iostride]) = tmp183 + tmp184; + c_im(inout[12 * iostride]) = tmp184 - tmp183; + } + } +} + +static const int twiddle_order[] = + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; +fftw_codelet_desc fftw_twiddle_16_desc = { + "fftw_twiddle_16", + (void (*)()) fftw_twiddle_16, + 16, + FFTW_FORWARD, + FFTW_TWIDDLE, + 352, + 15, + twiddle_order, +}; diff --git a/src/fftw/ftw_2.c b/src/fftw/ftw_2.c new file mode 100644 index 0000000..3c82074 --- /dev/null +++ b/src/fftw/ftw_2.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:07:31 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 2 */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 10 stack variables, and 8 memory accesses + */ + +/* + * Generator Id's : + * $Id: ftw_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_twiddle_2(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 1) { + fftw_real tmp1; + fftw_real tmp8; + fftw_real tmp6; + fftw_real tmp7; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp8 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[iostride]); + tmp5 = c_im(inout[iostride]); + tmp2 = c_re(W[0]); + tmp4 = c_im(W[0]); + tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5); + tmp7 = (tmp4 * tmp3) + (tmp2 * tmp5); + } + c_re(inout[iostride]) = tmp1 - tmp6; + c_re(inout[0]) = tmp1 + tmp6; + c_im(inout[0]) = tmp7 + tmp8; + c_im(inout[iostride]) = tmp8 - tmp7; + } +} + +static const int twiddle_order[] = { 1 }; +fftw_codelet_desc fftw_twiddle_2_desc = { + "fftw_twiddle_2", + (void (*)()) fftw_twiddle_2, + 2, + FFTW_FORWARD, + FFTW_TWIDDLE, + 44, + 1, + twiddle_order, +}; diff --git a/src/fftw/ftw_3.c b/src/fftw/ftw_3.c new file mode 100644 index 0000000..63ea592 --- /dev/null +++ b/src/fftw/ftw_3.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:07:31 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 3 */ + +/* + * This function contains 16 FP additions, 12 FP multiplications, + * (or, 10 additions, 6 multiplications, 6 fused multiply/add), + * 14 stack variables, and 12 memory accesses + */ +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); + +/* + * Generator Id's : + * $Id: ftw_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_twiddle_3(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 2) { + fftw_real tmp1; + fftw_real tmp18; + fftw_real tmp6; + fftw_real tmp14; + fftw_real tmp11; + fftw_real tmp15; + fftw_real tmp12; + fftw_real tmp17; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp18 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[iostride]); + tmp5 = c_im(inout[iostride]); + tmp2 = c_re(W[0]); + tmp4 = c_im(W[0]); + tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5); + tmp14 = (tmp4 * tmp3) + (tmp2 * tmp5); + } + { + fftw_real tmp8; + fftw_real tmp10; + fftw_real tmp7; + fftw_real tmp9; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(inout[2 * iostride]); + tmp10 = c_im(inout[2 * iostride]); + tmp7 = c_re(W[1]); + tmp9 = c_im(W[1]); + tmp11 = (tmp7 * tmp8) - (tmp9 * tmp10); + tmp15 = (tmp9 * tmp8) + (tmp7 * tmp10); + } + tmp12 = tmp6 + tmp11; + tmp17 = tmp14 + tmp15; + { + fftw_real tmp13; + fftw_real tmp16; + fftw_real tmp19; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + c_re(inout[0]) = tmp1 + tmp12; + tmp13 = tmp1 - (K500000000 * tmp12); + tmp16 = K866025403 * (tmp14 - tmp15); + c_re(inout[2 * iostride]) = tmp13 - tmp16; + c_re(inout[iostride]) = tmp13 + tmp16; + c_im(inout[0]) = tmp17 + tmp18; + tmp19 = K866025403 * (tmp11 - tmp6); + tmp20 = tmp18 - (K500000000 * tmp17); + c_im(inout[iostride]) = tmp19 + tmp20; + c_im(inout[2 * iostride]) = tmp20 - tmp19; + } + } +} + +static const int twiddle_order[] = { 1, 2 }; +fftw_codelet_desc fftw_twiddle_3_desc = { + "fftw_twiddle_3", + (void (*)()) fftw_twiddle_3, + 3, + FFTW_FORWARD, + FFTW_TWIDDLE, + 66, + 2, + twiddle_order, +}; diff --git a/src/fftw/ftw_32.c b/src/fftw/ftw_32.c new file mode 100644 index 0000000..8fa7e39 --- /dev/null +++ b/src/fftw/ftw_32.c @@ -0,0 +1,1398 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:07:45 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 32 */ + +/* + * This function contains 434 FP additions, 208 FP multiplications, + * (or, 340 additions, 114 multiplications, 94 fused multiply/add), + * 90 stack variables, and 128 memory accesses + */ +static const fftw_real K195090322 = +FFTW_KONST(+0.195090322016128267848284868477022240927691618); +static const fftw_real K980785280 = +FFTW_KONST(+0.980785280403230449126182236134239036973933731); +static const fftw_real K831469612 = +FFTW_KONST(+0.831469612302545237078788377617905756738560812); +static const fftw_real K555570233 = +FFTW_KONST(+0.555570233019602224742830813948532874374937191); +static const fftw_real K382683432 = +FFTW_KONST(+0.382683432365089771728459984030398866761344562); +static const fftw_real K923879532 = +FFTW_KONST(+0.923879532511286756128183189396788286822416626); +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: ftw_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_twiddle_32(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 31) { + fftw_real tmp19; + fftw_real tmp351; + fftw_real tmp472; + fftw_real tmp486; + fftw_real tmp442; + fftw_real tmp456; + fftw_real tmp191; + fftw_real tmp303; + fftw_real tmp161; + fftw_real tmp379; + fftw_real tmp276; + fftw_real tmp326; + fftw_real tmp386; + fftw_real tmp422; + fftw_real tmp259; + fftw_real tmp323; + fftw_real tmp42; + fftw_real tmp455; + fftw_real tmp201; + fftw_real tmp305; + fftw_real tmp354; + fftw_real tmp437; + fftw_real tmp196; + fftw_real tmp304; + fftw_real tmp184; + fftw_real tmp387; + fftw_real tmp382; + fftw_real tmp423; + fftw_real tmp270; + fftw_real tmp327; + fftw_real tmp279; + fftw_real tmp324; + fftw_real tmp66; + fftw_real tmp359; + fftw_real tmp213; + fftw_real tmp309; + fftw_real tmp358; + fftw_real tmp412; + fftw_real tmp208; + fftw_real tmp308; + fftw_real tmp114; + fftw_real tmp373; + fftw_real tmp249; + fftw_real tmp316; + fftw_real tmp370; + fftw_real tmp417; + fftw_real tmp232; + fftw_real tmp319; + fftw_real tmp89; + fftw_real tmp361; + fftw_real tmp224; + fftw_real tmp312; + fftw_real tmp364; + fftw_real tmp413; + fftw_real tmp219; + fftw_real tmp311; + fftw_real tmp137; + fftw_real tmp371; + fftw_real tmp376; + fftw_real tmp418; + fftw_real tmp243; + fftw_real tmp317; + fftw_real tmp252; + fftw_real tmp320; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp440; + fftw_real tmp6; + fftw_real tmp439; + fftw_real tmp12; + fftw_real tmp188; + fftw_real tmp17; + fftw_real tmp189; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp440 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[16 * iostride]); + tmp5 = c_im(inout[16 * iostride]); + tmp2 = c_re(W[15]); + tmp4 = c_im(W[15]); + tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5); + tmp439 = (tmp4 * tmp3) + (tmp2 * tmp5); + } + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[8 * iostride]); + tmp11 = c_im(inout[8 * iostride]); + tmp8 = c_re(W[7]); + tmp10 = c_im(W[7]); + tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11); + tmp188 = (tmp10 * tmp9) + (tmp8 * tmp11); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[24 * iostride]); + tmp16 = c_im(inout[24 * iostride]); + tmp13 = c_re(W[23]); + tmp15 = c_im(W[23]); + tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16); + tmp189 = (tmp15 * tmp14) + (tmp13 * tmp16); + } + { + fftw_real tmp7; + fftw_real tmp18; + fftw_real tmp470; + fftw_real tmp471; + ASSERT_ALIGNED_DOUBLE; + tmp7 = tmp1 + tmp6; + tmp18 = tmp12 + tmp17; + tmp19 = tmp7 + tmp18; + tmp351 = tmp7 - tmp18; + tmp470 = tmp440 - tmp439; + tmp471 = tmp12 - tmp17; + tmp472 = tmp470 - tmp471; + tmp486 = tmp471 + tmp470; + } + { + fftw_real tmp438; + fftw_real tmp441; + fftw_real tmp187; + fftw_real tmp190; + ASSERT_ALIGNED_DOUBLE; + tmp438 = tmp188 + tmp189; + tmp441 = tmp439 + tmp440; + tmp442 = tmp438 + tmp441; + tmp456 = tmp441 - tmp438; + tmp187 = tmp1 - tmp6; + tmp190 = tmp188 - tmp189; + tmp191 = tmp187 - tmp190; + tmp303 = tmp187 + tmp190; + } + } + { + fftw_real tmp143; + fftw_real tmp272; + fftw_real tmp159; + fftw_real tmp257; + fftw_real tmp148; + fftw_real tmp273; + fftw_real tmp154; + fftw_real tmp256; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp140; + fftw_real tmp142; + fftw_real tmp139; + fftw_real tmp141; + ASSERT_ALIGNED_DOUBLE; + tmp140 = c_re(inout[31 * iostride]); + tmp142 = c_im(inout[31 * iostride]); + tmp139 = c_re(W[30]); + tmp141 = c_im(W[30]); + tmp143 = (tmp139 * tmp140) - (tmp141 * tmp142); + tmp272 = (tmp141 * tmp140) + (tmp139 * tmp142); + } + { + fftw_real tmp156; + fftw_real tmp158; + fftw_real tmp155; + fftw_real tmp157; + ASSERT_ALIGNED_DOUBLE; + tmp156 = c_re(inout[23 * iostride]); + tmp158 = c_im(inout[23 * iostride]); + tmp155 = c_re(W[22]); + tmp157 = c_im(W[22]); + tmp159 = (tmp155 * tmp156) - (tmp157 * tmp158); + tmp257 = (tmp157 * tmp156) + (tmp155 * tmp158); + } + { + fftw_real tmp145; + fftw_real tmp147; + fftw_real tmp144; + fftw_real tmp146; + ASSERT_ALIGNED_DOUBLE; + tmp145 = c_re(inout[15 * iostride]); + tmp147 = c_im(inout[15 * iostride]); + tmp144 = c_re(W[14]); + tmp146 = c_im(W[14]); + tmp148 = (tmp144 * tmp145) - (tmp146 * tmp147); + tmp273 = (tmp146 * tmp145) + (tmp144 * tmp147); + } + { + fftw_real tmp151; + fftw_real tmp153; + fftw_real tmp150; + fftw_real tmp152; + ASSERT_ALIGNED_DOUBLE; + tmp151 = c_re(inout[7 * iostride]); + tmp153 = c_im(inout[7 * iostride]); + tmp150 = c_re(W[6]); + tmp152 = c_im(W[6]); + tmp154 = (tmp150 * tmp151) - (tmp152 * tmp153); + tmp256 = (tmp152 * tmp151) + (tmp150 * tmp153); + } + { + fftw_real tmp149; + fftw_real tmp160; + fftw_real tmp274; + fftw_real tmp275; + ASSERT_ALIGNED_DOUBLE; + tmp149 = tmp143 + tmp148; + tmp160 = tmp154 + tmp159; + tmp161 = tmp149 + tmp160; + tmp379 = tmp149 - tmp160; + tmp274 = tmp272 - tmp273; + tmp275 = tmp154 - tmp159; + tmp276 = tmp274 + tmp275; + tmp326 = tmp274 - tmp275; + } + { + fftw_real tmp384; + fftw_real tmp385; + fftw_real tmp255; + fftw_real tmp258; + ASSERT_ALIGNED_DOUBLE; + tmp384 = tmp272 + tmp273; + tmp385 = tmp256 + tmp257; + tmp386 = tmp384 - tmp385; + tmp422 = tmp384 + tmp385; + tmp255 = tmp143 - tmp148; + tmp258 = tmp256 - tmp257; + tmp259 = tmp255 - tmp258; + tmp323 = tmp255 + tmp258; + } + } + { + fftw_real tmp24; + fftw_real tmp192; + fftw_real tmp40; + fftw_real tmp199; + fftw_real tmp29; + fftw_real tmp193; + fftw_real tmp35; + fftw_real tmp198; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp21; + fftw_real tmp23; + fftw_real tmp20; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp21 = c_re(inout[4 * iostride]); + tmp23 = c_im(inout[4 * iostride]); + tmp20 = c_re(W[3]); + tmp22 = c_im(W[3]); + tmp24 = (tmp20 * tmp21) - (tmp22 * tmp23); + tmp192 = (tmp22 * tmp21) + (tmp20 * tmp23); + } + { + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp36; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp37 = c_re(inout[12 * iostride]); + tmp39 = c_im(inout[12 * iostride]); + tmp36 = c_re(W[11]); + tmp38 = c_im(W[11]); + tmp40 = (tmp36 * tmp37) - (tmp38 * tmp39); + tmp199 = (tmp38 * tmp37) + (tmp36 * tmp39); + } + { + fftw_real tmp26; + fftw_real tmp28; + fftw_real tmp25; + fftw_real tmp27; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(inout[20 * iostride]); + tmp28 = c_im(inout[20 * iostride]); + tmp25 = c_re(W[19]); + tmp27 = c_im(W[19]); + tmp29 = (tmp25 * tmp26) - (tmp27 * tmp28); + tmp193 = (tmp27 * tmp26) + (tmp25 * tmp28); + } + { + fftw_real tmp32; + fftw_real tmp34; + fftw_real tmp31; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(inout[28 * iostride]); + tmp34 = c_im(inout[28 * iostride]); + tmp31 = c_re(W[27]); + tmp33 = c_im(W[27]); + tmp35 = (tmp31 * tmp32) - (tmp33 * tmp34); + tmp198 = (tmp33 * tmp32) + (tmp31 * tmp34); + } + { + fftw_real tmp30; + fftw_real tmp41; + fftw_real tmp197; + fftw_real tmp200; + ASSERT_ALIGNED_DOUBLE; + tmp30 = tmp24 + tmp29; + tmp41 = tmp35 + tmp40; + tmp42 = tmp30 + tmp41; + tmp455 = tmp41 - tmp30; + tmp197 = tmp35 - tmp40; + tmp200 = tmp198 - tmp199; + tmp201 = tmp197 + tmp200; + tmp305 = tmp197 - tmp200; + } + { + fftw_real tmp352; + fftw_real tmp353; + fftw_real tmp194; + fftw_real tmp195; + ASSERT_ALIGNED_DOUBLE; + tmp352 = tmp192 + tmp193; + tmp353 = tmp198 + tmp199; + tmp354 = tmp352 - tmp353; + tmp437 = tmp352 + tmp353; + tmp194 = tmp192 - tmp193; + tmp195 = tmp24 - tmp29; + tmp196 = tmp194 - tmp195; + tmp304 = tmp195 + tmp194; + } + } + { + fftw_real tmp166; + fftw_real tmp260; + fftw_real tmp171; + fftw_real tmp261; + fftw_real tmp262; + fftw_real tmp263; + fftw_real tmp177; + fftw_real tmp266; + fftw_real tmp182; + fftw_real tmp267; + fftw_real tmp265; + fftw_real tmp268; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp163; + fftw_real tmp165; + fftw_real tmp162; + fftw_real tmp164; + ASSERT_ALIGNED_DOUBLE; + tmp163 = c_re(inout[3 * iostride]); + tmp165 = c_im(inout[3 * iostride]); + tmp162 = c_re(W[2]); + tmp164 = c_im(W[2]); + tmp166 = (tmp162 * tmp163) - (tmp164 * tmp165); + tmp260 = (tmp164 * tmp163) + (tmp162 * tmp165); + } + { + fftw_real tmp168; + fftw_real tmp170; + fftw_real tmp167; + fftw_real tmp169; + ASSERT_ALIGNED_DOUBLE; + tmp168 = c_re(inout[19 * iostride]); + tmp170 = c_im(inout[19 * iostride]); + tmp167 = c_re(W[18]); + tmp169 = c_im(W[18]); + tmp171 = (tmp167 * tmp168) - (tmp169 * tmp170); + tmp261 = (tmp169 * tmp168) + (tmp167 * tmp170); + } + tmp262 = tmp260 - tmp261; + tmp263 = tmp166 - tmp171; + { + fftw_real tmp174; + fftw_real tmp176; + fftw_real tmp173; + fftw_real tmp175; + ASSERT_ALIGNED_DOUBLE; + tmp174 = c_re(inout[27 * iostride]); + tmp176 = c_im(inout[27 * iostride]); + tmp173 = c_re(W[26]); + tmp175 = c_im(W[26]); + tmp177 = (tmp173 * tmp174) - (tmp175 * tmp176); + tmp266 = (tmp175 * tmp174) + (tmp173 * tmp176); + } + { + fftw_real tmp179; + fftw_real tmp181; + fftw_real tmp178; + fftw_real tmp180; + ASSERT_ALIGNED_DOUBLE; + tmp179 = c_re(inout[11 * iostride]); + tmp181 = c_im(inout[11 * iostride]); + tmp178 = c_re(W[10]); + tmp180 = c_im(W[10]); + tmp182 = (tmp178 * tmp179) - (tmp180 * tmp181); + tmp267 = (tmp180 * tmp179) + (tmp178 * tmp181); + } + tmp265 = tmp177 - tmp182; + tmp268 = tmp266 - tmp267; + { + fftw_real tmp172; + fftw_real tmp183; + fftw_real tmp380; + fftw_real tmp381; + ASSERT_ALIGNED_DOUBLE; + tmp172 = tmp166 + tmp171; + tmp183 = tmp177 + tmp182; + tmp184 = tmp172 + tmp183; + tmp387 = tmp183 - tmp172; + tmp380 = tmp260 + tmp261; + tmp381 = tmp266 + tmp267; + tmp382 = tmp380 - tmp381; + tmp423 = tmp380 + tmp381; + } + { + fftw_real tmp264; + fftw_real tmp269; + fftw_real tmp277; + fftw_real tmp278; + ASSERT_ALIGNED_DOUBLE; + tmp264 = tmp262 - tmp263; + tmp269 = tmp265 + tmp268; + tmp270 = K707106781 * (tmp264 - tmp269); + tmp327 = K707106781 * (tmp264 + tmp269); + tmp277 = tmp265 - tmp268; + tmp278 = tmp263 + tmp262; + tmp279 = K707106781 * (tmp277 - tmp278); + tmp324 = K707106781 * (tmp278 + tmp277); + } + } + { + fftw_real tmp48; + fftw_real tmp204; + fftw_real tmp64; + fftw_real tmp211; + fftw_real tmp53; + fftw_real tmp205; + fftw_real tmp59; + fftw_real tmp210; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp45; + fftw_real tmp47; + fftw_real tmp44; + fftw_real tmp46; + ASSERT_ALIGNED_DOUBLE; + tmp45 = c_re(inout[2 * iostride]); + tmp47 = c_im(inout[2 * iostride]); + tmp44 = c_re(W[1]); + tmp46 = c_im(W[1]); + tmp48 = (tmp44 * tmp45) - (tmp46 * tmp47); + tmp204 = (tmp46 * tmp45) + (tmp44 * tmp47); + } + { + fftw_real tmp61; + fftw_real tmp63; + fftw_real tmp60; + fftw_real tmp62; + ASSERT_ALIGNED_DOUBLE; + tmp61 = c_re(inout[26 * iostride]); + tmp63 = c_im(inout[26 * iostride]); + tmp60 = c_re(W[25]); + tmp62 = c_im(W[25]); + tmp64 = (tmp60 * tmp61) - (tmp62 * tmp63); + tmp211 = (tmp62 * tmp61) + (tmp60 * tmp63); + } + { + fftw_real tmp50; + fftw_real tmp52; + fftw_real tmp49; + fftw_real tmp51; + ASSERT_ALIGNED_DOUBLE; + tmp50 = c_re(inout[18 * iostride]); + tmp52 = c_im(inout[18 * iostride]); + tmp49 = c_re(W[17]); + tmp51 = c_im(W[17]); + tmp53 = (tmp49 * tmp50) - (tmp51 * tmp52); + tmp205 = (tmp51 * tmp50) + (tmp49 * tmp52); + } + { + fftw_real tmp56; + fftw_real tmp58; + fftw_real tmp55; + fftw_real tmp57; + ASSERT_ALIGNED_DOUBLE; + tmp56 = c_re(inout[10 * iostride]); + tmp58 = c_im(inout[10 * iostride]); + tmp55 = c_re(W[9]); + tmp57 = c_im(W[9]); + tmp59 = (tmp55 * tmp56) - (tmp57 * tmp58); + tmp210 = (tmp57 * tmp56) + (tmp55 * tmp58); + } + { + fftw_real tmp54; + fftw_real tmp65; + fftw_real tmp209; + fftw_real tmp212; + ASSERT_ALIGNED_DOUBLE; + tmp54 = tmp48 + tmp53; + tmp65 = tmp59 + tmp64; + tmp66 = tmp54 + tmp65; + tmp359 = tmp54 - tmp65; + tmp209 = tmp48 - tmp53; + tmp212 = tmp210 - tmp211; + tmp213 = tmp209 - tmp212; + tmp309 = tmp209 + tmp212; + } + { + fftw_real tmp356; + fftw_real tmp357; + fftw_real tmp206; + fftw_real tmp207; + ASSERT_ALIGNED_DOUBLE; + tmp356 = tmp204 + tmp205; + tmp357 = tmp210 + tmp211; + tmp358 = tmp356 - tmp357; + tmp412 = tmp356 + tmp357; + tmp206 = tmp204 - tmp205; + tmp207 = tmp59 - tmp64; + tmp208 = tmp206 + tmp207; + tmp308 = tmp206 - tmp207; + } + } + { + fftw_real tmp96; + fftw_real tmp228; + fftw_real tmp112; + fftw_real tmp247; + fftw_real tmp101; + fftw_real tmp229; + fftw_real tmp107; + fftw_real tmp246; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp93; + fftw_real tmp95; + fftw_real tmp92; + fftw_real tmp94; + ASSERT_ALIGNED_DOUBLE; + tmp93 = c_re(inout[iostride]); + tmp95 = c_im(inout[iostride]); + tmp92 = c_re(W[0]); + tmp94 = c_im(W[0]); + tmp96 = (tmp92 * tmp93) - (tmp94 * tmp95); + tmp228 = (tmp94 * tmp93) + (tmp92 * tmp95); + } + { + fftw_real tmp109; + fftw_real tmp111; + fftw_real tmp108; + fftw_real tmp110; + ASSERT_ALIGNED_DOUBLE; + tmp109 = c_re(inout[25 * iostride]); + tmp111 = c_im(inout[25 * iostride]); + tmp108 = c_re(W[24]); + tmp110 = c_im(W[24]); + tmp112 = (tmp108 * tmp109) - (tmp110 * tmp111); + tmp247 = (tmp110 * tmp109) + (tmp108 * tmp111); + } + { + fftw_real tmp98; + fftw_real tmp100; + fftw_real tmp97; + fftw_real tmp99; + ASSERT_ALIGNED_DOUBLE; + tmp98 = c_re(inout[17 * iostride]); + tmp100 = c_im(inout[17 * iostride]); + tmp97 = c_re(W[16]); + tmp99 = c_im(W[16]); + tmp101 = (tmp97 * tmp98) - (tmp99 * tmp100); + tmp229 = (tmp99 * tmp98) + (tmp97 * tmp100); + } + { + fftw_real tmp104; + fftw_real tmp106; + fftw_real tmp103; + fftw_real tmp105; + ASSERT_ALIGNED_DOUBLE; + tmp104 = c_re(inout[9 * iostride]); + tmp106 = c_im(inout[9 * iostride]); + tmp103 = c_re(W[8]); + tmp105 = c_im(W[8]); + tmp107 = (tmp103 * tmp104) - (tmp105 * tmp106); + tmp246 = (tmp105 * tmp104) + (tmp103 * tmp106); + } + { + fftw_real tmp102; + fftw_real tmp113; + fftw_real tmp245; + fftw_real tmp248; + ASSERT_ALIGNED_DOUBLE; + tmp102 = tmp96 + tmp101; + tmp113 = tmp107 + tmp112; + tmp114 = tmp102 + tmp113; + tmp373 = tmp102 - tmp113; + tmp245 = tmp96 - tmp101; + tmp248 = tmp246 - tmp247; + tmp249 = tmp245 - tmp248; + tmp316 = tmp245 + tmp248; + } + { + fftw_real tmp368; + fftw_real tmp369; + fftw_real tmp230; + fftw_real tmp231; + ASSERT_ALIGNED_DOUBLE; + tmp368 = tmp228 + tmp229; + tmp369 = tmp246 + tmp247; + tmp370 = tmp368 - tmp369; + tmp417 = tmp368 + tmp369; + tmp230 = tmp228 - tmp229; + tmp231 = tmp107 - tmp112; + tmp232 = tmp230 + tmp231; + tmp319 = tmp230 - tmp231; + } + } + { + fftw_real tmp71; + fftw_real tmp215; + fftw_real tmp87; + fftw_real tmp222; + fftw_real tmp76; + fftw_real tmp216; + fftw_real tmp82; + fftw_real tmp221; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp68; + fftw_real tmp70; + fftw_real tmp67; + fftw_real tmp69; + ASSERT_ALIGNED_DOUBLE; + tmp68 = c_re(inout[30 * iostride]); + tmp70 = c_im(inout[30 * iostride]); + tmp67 = c_re(W[29]); + tmp69 = c_im(W[29]); + tmp71 = (tmp67 * tmp68) - (tmp69 * tmp70); + tmp215 = (tmp69 * tmp68) + (tmp67 * tmp70); + } + { + fftw_real tmp84; + fftw_real tmp86; + fftw_real tmp83; + fftw_real tmp85; + ASSERT_ALIGNED_DOUBLE; + tmp84 = c_re(inout[22 * iostride]); + tmp86 = c_im(inout[22 * iostride]); + tmp83 = c_re(W[21]); + tmp85 = c_im(W[21]); + tmp87 = (tmp83 * tmp84) - (tmp85 * tmp86); + tmp222 = (tmp85 * tmp84) + (tmp83 * tmp86); + } + { + fftw_real tmp73; + fftw_real tmp75; + fftw_real tmp72; + fftw_real tmp74; + ASSERT_ALIGNED_DOUBLE; + tmp73 = c_re(inout[14 * iostride]); + tmp75 = c_im(inout[14 * iostride]); + tmp72 = c_re(W[13]); + tmp74 = c_im(W[13]); + tmp76 = (tmp72 * tmp73) - (tmp74 * tmp75); + tmp216 = (tmp74 * tmp73) + (tmp72 * tmp75); + } + { + fftw_real tmp79; + fftw_real tmp81; + fftw_real tmp78; + fftw_real tmp80; + ASSERT_ALIGNED_DOUBLE; + tmp79 = c_re(inout[6 * iostride]); + tmp81 = c_im(inout[6 * iostride]); + tmp78 = c_re(W[5]); + tmp80 = c_im(W[5]); + tmp82 = (tmp78 * tmp79) - (tmp80 * tmp81); + tmp221 = (tmp80 * tmp79) + (tmp78 * tmp81); + } + { + fftw_real tmp77; + fftw_real tmp88; + fftw_real tmp220; + fftw_real tmp223; + ASSERT_ALIGNED_DOUBLE; + tmp77 = tmp71 + tmp76; + tmp88 = tmp82 + tmp87; + tmp89 = tmp77 + tmp88; + tmp361 = tmp77 - tmp88; + tmp220 = tmp71 - tmp76; + tmp223 = tmp221 - tmp222; + tmp224 = tmp220 - tmp223; + tmp312 = tmp220 + tmp223; + } + { + fftw_real tmp362; + fftw_real tmp363; + fftw_real tmp217; + fftw_real tmp218; + ASSERT_ALIGNED_DOUBLE; + tmp362 = tmp215 + tmp216; + tmp363 = tmp221 + tmp222; + tmp364 = tmp362 - tmp363; + tmp413 = tmp362 + tmp363; + tmp217 = tmp215 - tmp216; + tmp218 = tmp82 - tmp87; + tmp219 = tmp217 + tmp218; + tmp311 = tmp217 - tmp218; + } + } + { + fftw_real tmp119; + fftw_real tmp239; + fftw_real tmp124; + fftw_real tmp240; + fftw_real tmp238; + fftw_real tmp241; + fftw_real tmp130; + fftw_real tmp234; + fftw_real tmp135; + fftw_real tmp235; + fftw_real tmp233; + fftw_real tmp236; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp116; + fftw_real tmp118; + fftw_real tmp115; + fftw_real tmp117; + ASSERT_ALIGNED_DOUBLE; + tmp116 = c_re(inout[5 * iostride]); + tmp118 = c_im(inout[5 * iostride]); + tmp115 = c_re(W[4]); + tmp117 = c_im(W[4]); + tmp119 = (tmp115 * tmp116) - (tmp117 * tmp118); + tmp239 = (tmp117 * tmp116) + (tmp115 * tmp118); + } + { + fftw_real tmp121; + fftw_real tmp123; + fftw_real tmp120; + fftw_real tmp122; + ASSERT_ALIGNED_DOUBLE; + tmp121 = c_re(inout[21 * iostride]); + tmp123 = c_im(inout[21 * iostride]); + tmp120 = c_re(W[20]); + tmp122 = c_im(W[20]); + tmp124 = (tmp120 * tmp121) - (tmp122 * tmp123); + tmp240 = (tmp122 * tmp121) + (tmp120 * tmp123); + } + tmp238 = tmp119 - tmp124; + tmp241 = tmp239 - tmp240; + { + fftw_real tmp127; + fftw_real tmp129; + fftw_real tmp126; + fftw_real tmp128; + ASSERT_ALIGNED_DOUBLE; + tmp127 = c_re(inout[29 * iostride]); + tmp129 = c_im(inout[29 * iostride]); + tmp126 = c_re(W[28]); + tmp128 = c_im(W[28]); + tmp130 = (tmp126 * tmp127) - (tmp128 * tmp129); + tmp234 = (tmp128 * tmp127) + (tmp126 * tmp129); + } + { + fftw_real tmp132; + fftw_real tmp134; + fftw_real tmp131; + fftw_real tmp133; + ASSERT_ALIGNED_DOUBLE; + tmp132 = c_re(inout[13 * iostride]); + tmp134 = c_im(inout[13 * iostride]); + tmp131 = c_re(W[12]); + tmp133 = c_im(W[12]); + tmp135 = (tmp131 * tmp132) - (tmp133 * tmp134); + tmp235 = (tmp133 * tmp132) + (tmp131 * tmp134); + } + tmp233 = tmp130 - tmp135; + tmp236 = tmp234 - tmp235; + { + fftw_real tmp125; + fftw_real tmp136; + fftw_real tmp374; + fftw_real tmp375; + ASSERT_ALIGNED_DOUBLE; + tmp125 = tmp119 + tmp124; + tmp136 = tmp130 + tmp135; + tmp137 = tmp125 + tmp136; + tmp371 = tmp136 - tmp125; + tmp374 = tmp239 + tmp240; + tmp375 = tmp234 + tmp235; + tmp376 = tmp374 - tmp375; + tmp418 = tmp374 + tmp375; + } + { + fftw_real tmp237; + fftw_real tmp242; + fftw_real tmp250; + fftw_real tmp251; + ASSERT_ALIGNED_DOUBLE; + tmp237 = tmp233 - tmp236; + tmp242 = tmp238 + tmp241; + tmp243 = K707106781 * (tmp237 - tmp242); + tmp317 = K707106781 * (tmp242 + tmp237); + tmp250 = tmp241 - tmp238; + tmp251 = tmp233 + tmp236; + tmp252 = K707106781 * (tmp250 - tmp251); + tmp320 = K707106781 * (tmp250 + tmp251); + } + } + { + fftw_real tmp91; + fftw_real tmp431; + fftw_real tmp444; + fftw_real tmp446; + fftw_real tmp186; + fftw_real tmp445; + fftw_real tmp434; + fftw_real tmp435; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp43; + fftw_real tmp90; + fftw_real tmp436; + fftw_real tmp443; + ASSERT_ALIGNED_DOUBLE; + tmp43 = tmp19 + tmp42; + tmp90 = tmp66 + tmp89; + tmp91 = tmp43 + tmp90; + tmp431 = tmp43 - tmp90; + tmp436 = tmp412 + tmp413; + tmp443 = tmp437 + tmp442; + tmp444 = tmp436 + tmp443; + tmp446 = tmp443 - tmp436; + } + { + fftw_real tmp138; + fftw_real tmp185; + fftw_real tmp432; + fftw_real tmp433; + ASSERT_ALIGNED_DOUBLE; + tmp138 = tmp114 + tmp137; + tmp185 = tmp161 + tmp184; + tmp186 = tmp138 + tmp185; + tmp445 = tmp185 - tmp138; + tmp432 = tmp417 + tmp418; + tmp433 = tmp422 + tmp423; + tmp434 = tmp432 - tmp433; + tmp435 = tmp432 + tmp433; + } + c_re(inout[16 * iostride]) = tmp91 - tmp186; + c_re(inout[0]) = tmp91 + tmp186; + c_re(inout[24 * iostride]) = tmp431 - tmp434; + c_re(inout[8 * iostride]) = tmp431 + tmp434; + c_im(inout[0]) = tmp435 + tmp444; + c_im(inout[16 * iostride]) = tmp444 - tmp435; + c_im(inout[8 * iostride]) = tmp445 + tmp446; + c_im(inout[24 * iostride]) = tmp446 - tmp445; + } + { + fftw_real tmp415; + fftw_real tmp427; + fftw_real tmp450; + fftw_real tmp452; + fftw_real tmp420; + fftw_real tmp428; + fftw_real tmp425; + fftw_real tmp429; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp411; + fftw_real tmp414; + fftw_real tmp448; + fftw_real tmp449; + ASSERT_ALIGNED_DOUBLE; + tmp411 = tmp19 - tmp42; + tmp414 = tmp412 - tmp413; + tmp415 = tmp411 + tmp414; + tmp427 = tmp411 - tmp414; + tmp448 = tmp89 - tmp66; + tmp449 = tmp442 - tmp437; + tmp450 = tmp448 + tmp449; + tmp452 = tmp449 - tmp448; + } + { + fftw_real tmp416; + fftw_real tmp419; + fftw_real tmp421; + fftw_real tmp424; + ASSERT_ALIGNED_DOUBLE; + tmp416 = tmp114 - tmp137; + tmp419 = tmp417 - tmp418; + tmp420 = tmp416 + tmp419; + tmp428 = tmp419 - tmp416; + tmp421 = tmp161 - tmp184; + tmp424 = tmp422 - tmp423; + tmp425 = tmp421 - tmp424; + tmp429 = tmp421 + tmp424; + } + { + fftw_real tmp426; + fftw_real tmp451; + fftw_real tmp430; + fftw_real tmp447; + ASSERT_ALIGNED_DOUBLE; + tmp426 = K707106781 * (tmp420 + tmp425); + c_re(inout[20 * iostride]) = tmp415 - tmp426; + c_re(inout[4 * iostride]) = tmp415 + tmp426; + tmp451 = K707106781 * (tmp425 - tmp420); + c_im(inout[12 * iostride]) = tmp451 + tmp452; + c_im(inout[28 * iostride]) = tmp452 - tmp451; + tmp430 = K707106781 * (tmp428 - tmp429); + c_re(inout[28 * iostride]) = tmp427 - tmp430; + c_re(inout[12 * iostride]) = tmp427 + tmp430; + tmp447 = K707106781 * (tmp428 + tmp429); + c_im(inout[4 * iostride]) = tmp447 + tmp450; + c_im(inout[20 * iostride]) = tmp450 - tmp447; + } + } + { + fftw_real tmp355; + fftw_real tmp395; + fftw_real tmp366; + fftw_real tmp454; + fftw_real tmp398; + fftw_real tmp462; + fftw_real tmp378; + fftw_real tmp392; + fftw_real tmp457; + fftw_real tmp463; + fftw_real tmp402; + fftw_real tmp408; + fftw_real tmp389; + fftw_real tmp393; + fftw_real tmp405; + fftw_real tmp409; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp360; + fftw_real tmp365; + fftw_real tmp400; + fftw_real tmp401; + ASSERT_ALIGNED_DOUBLE; + tmp355 = tmp351 - tmp354; + tmp395 = tmp351 + tmp354; + tmp360 = tmp358 - tmp359; + tmp365 = tmp361 + tmp364; + tmp366 = K707106781 * (tmp360 - tmp365); + tmp454 = K707106781 * (tmp360 + tmp365); + { + fftw_real tmp396; + fftw_real tmp397; + fftw_real tmp372; + fftw_real tmp377; + ASSERT_ALIGNED_DOUBLE; + tmp396 = tmp359 + tmp358; + tmp397 = tmp361 - tmp364; + tmp398 = K707106781 * (tmp396 + tmp397); + tmp462 = K707106781 * (tmp397 - tmp396); + tmp372 = tmp370 - tmp371; + tmp377 = tmp373 - tmp376; + tmp378 = + (K923879532 * tmp372) + (K382683432 * tmp377); + tmp392 = + (K382683432 * tmp372) - (K923879532 * tmp377); + } + tmp457 = tmp455 + tmp456; + tmp463 = tmp456 - tmp455; + tmp400 = tmp370 + tmp371; + tmp401 = tmp373 + tmp376; + tmp402 = (K382683432 * tmp400) + (K923879532 * tmp401); + tmp408 = (K923879532 * tmp400) - (K382683432 * tmp401); + { + fftw_real tmp383; + fftw_real tmp388; + fftw_real tmp403; + fftw_real tmp404; + ASSERT_ALIGNED_DOUBLE; + tmp383 = tmp379 - tmp382; + tmp388 = tmp386 - tmp387; + tmp389 = + (K382683432 * tmp383) - (K923879532 * tmp388); + tmp393 = + (K382683432 * tmp388) + (K923879532 * tmp383); + tmp403 = tmp379 + tmp382; + tmp404 = tmp386 + tmp387; + tmp405 = + (K923879532 * tmp403) - (K382683432 * tmp404); + tmp409 = + (K923879532 * tmp404) + (K382683432 * tmp403); + } + } + { + fftw_real tmp367; + fftw_real tmp390; + fftw_real tmp391; + fftw_real tmp394; + ASSERT_ALIGNED_DOUBLE; + tmp367 = tmp355 + tmp366; + tmp390 = tmp378 + tmp389; + c_re(inout[22 * iostride]) = tmp367 - tmp390; + c_re(inout[6 * iostride]) = tmp367 + tmp390; + tmp391 = tmp355 - tmp366; + tmp394 = tmp392 - tmp393; + c_re(inout[30 * iostride]) = tmp391 - tmp394; + c_re(inout[14 * iostride]) = tmp391 + tmp394; + } + { + fftw_real tmp461; + fftw_real tmp464; + fftw_real tmp465; + fftw_real tmp466; + ASSERT_ALIGNED_DOUBLE; + tmp461 = tmp392 + tmp393; + tmp464 = tmp462 + tmp463; + c_im(inout[6 * iostride]) = tmp461 + tmp464; + c_im(inout[22 * iostride]) = tmp464 - tmp461; + tmp465 = tmp389 - tmp378; + tmp466 = tmp463 - tmp462; + c_im(inout[14 * iostride]) = tmp465 + tmp466; + c_im(inout[30 * iostride]) = tmp466 - tmp465; + } + { + fftw_real tmp399; + fftw_real tmp406; + fftw_real tmp407; + fftw_real tmp410; + ASSERT_ALIGNED_DOUBLE; + tmp399 = tmp395 + tmp398; + tmp406 = tmp402 + tmp405; + c_re(inout[18 * iostride]) = tmp399 - tmp406; + c_re(inout[2 * iostride]) = tmp399 + tmp406; + tmp407 = tmp395 - tmp398; + tmp410 = tmp408 - tmp409; + c_re(inout[26 * iostride]) = tmp407 - tmp410; + c_re(inout[10 * iostride]) = tmp407 + tmp410; + } + { + fftw_real tmp453; + fftw_real tmp458; + fftw_real tmp459; + fftw_real tmp460; + ASSERT_ALIGNED_DOUBLE; + tmp453 = tmp408 + tmp409; + tmp458 = tmp454 + tmp457; + c_im(inout[2 * iostride]) = tmp453 + tmp458; + c_im(inout[18 * iostride]) = tmp458 - tmp453; + tmp459 = tmp405 - tmp402; + tmp460 = tmp457 - tmp454; + c_im(inout[10 * iostride]) = tmp459 + tmp460; + c_im(inout[26 * iostride]) = tmp460 - tmp459; + } + } + { + fftw_real tmp307; + fftw_real tmp335; + fftw_real tmp338; + fftw_real tmp478; + fftw_real tmp473; + fftw_real tmp479; + fftw_real tmp314; + fftw_real tmp468; + fftw_real tmp322; + fftw_real tmp332; + fftw_real tmp342; + fftw_real tmp348; + fftw_real tmp329; + fftw_real tmp333; + fftw_real tmp345; + fftw_real tmp349; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp306; + fftw_real tmp336; + fftw_real tmp337; + fftw_real tmp469; + fftw_real tmp310; + fftw_real tmp313; + ASSERT_ALIGNED_DOUBLE; + tmp306 = K707106781 * (tmp304 + tmp305); + tmp307 = tmp303 - tmp306; + tmp335 = tmp303 + tmp306; + tmp336 = (K382683432 * tmp308) + (K923879532 * tmp309); + tmp337 = (K923879532 * tmp312) - (K382683432 * tmp311); + tmp338 = tmp336 + tmp337; + tmp478 = tmp337 - tmp336; + tmp469 = K707106781 * (tmp196 + tmp201); + tmp473 = tmp469 + tmp472; + tmp479 = tmp472 - tmp469; + tmp310 = (K923879532 * tmp308) - (K382683432 * tmp309); + tmp313 = (K923879532 * tmp311) + (K382683432 * tmp312); + tmp314 = tmp310 - tmp313; + tmp468 = tmp310 + tmp313; + } + { + fftw_real tmp318; + fftw_real tmp321; + fftw_real tmp340; + fftw_real tmp341; + ASSERT_ALIGNED_DOUBLE; + tmp318 = tmp316 - tmp317; + tmp321 = tmp319 - tmp320; + tmp322 = (K555570233 * tmp318) + (K831469612 * tmp321); + tmp332 = (K555570233 * tmp321) - (K831469612 * tmp318); + tmp340 = tmp316 + tmp317; + tmp341 = tmp319 + tmp320; + tmp342 = (K980785280 * tmp340) + (K195090322 * tmp341); + tmp348 = (K980785280 * tmp341) - (K195090322 * tmp340); + } + { + fftw_real tmp325; + fftw_real tmp328; + fftw_real tmp343; + fftw_real tmp344; + ASSERT_ALIGNED_DOUBLE; + tmp325 = tmp323 - tmp324; + tmp328 = tmp326 - tmp327; + tmp329 = (K555570233 * tmp325) - (K831469612 * tmp328); + tmp333 = (K831469612 * tmp325) + (K555570233 * tmp328); + tmp343 = tmp323 + tmp324; + tmp344 = tmp326 + tmp327; + tmp345 = (K980785280 * tmp343) - (K195090322 * tmp344); + tmp349 = (K195090322 * tmp343) + (K980785280 * tmp344); + } + { + fftw_real tmp315; + fftw_real tmp330; + fftw_real tmp331; + fftw_real tmp334; + ASSERT_ALIGNED_DOUBLE; + tmp315 = tmp307 + tmp314; + tmp330 = tmp322 + tmp329; + c_re(inout[21 * iostride]) = tmp315 - tmp330; + c_re(inout[5 * iostride]) = tmp315 + tmp330; + tmp331 = tmp307 - tmp314; + tmp334 = tmp332 - tmp333; + c_re(inout[29 * iostride]) = tmp331 - tmp334; + c_re(inout[13 * iostride]) = tmp331 + tmp334; + } + { + fftw_real tmp477; + fftw_real tmp480; + fftw_real tmp481; + fftw_real tmp482; + ASSERT_ALIGNED_DOUBLE; + tmp477 = tmp332 + tmp333; + tmp480 = tmp478 + tmp479; + c_im(inout[5 * iostride]) = tmp477 + tmp480; + c_im(inout[21 * iostride]) = tmp480 - tmp477; + tmp481 = tmp329 - tmp322; + tmp482 = tmp479 - tmp478; + c_im(inout[13 * iostride]) = tmp481 + tmp482; + c_im(inout[29 * iostride]) = tmp482 - tmp481; + } + { + fftw_real tmp339; + fftw_real tmp346; + fftw_real tmp347; + fftw_real tmp350; + ASSERT_ALIGNED_DOUBLE; + tmp339 = tmp335 + tmp338; + tmp346 = tmp342 + tmp345; + c_re(inout[17 * iostride]) = tmp339 - tmp346; + c_re(inout[iostride]) = tmp339 + tmp346; + tmp347 = tmp335 - tmp338; + tmp350 = tmp348 - tmp349; + c_re(inout[25 * iostride]) = tmp347 - tmp350; + c_re(inout[9 * iostride]) = tmp347 + tmp350; + } + { + fftw_real tmp467; + fftw_real tmp474; + fftw_real tmp475; + fftw_real tmp476; + ASSERT_ALIGNED_DOUBLE; + tmp467 = tmp348 + tmp349; + tmp474 = tmp468 + tmp473; + c_im(inout[iostride]) = tmp467 + tmp474; + c_im(inout[17 * iostride]) = tmp474 - tmp467; + tmp475 = tmp345 - tmp342; + tmp476 = tmp473 - tmp468; + c_im(inout[9 * iostride]) = tmp475 + tmp476; + c_im(inout[25 * iostride]) = tmp476 - tmp475; + } + } + { + fftw_real tmp203; + fftw_real tmp287; + fftw_real tmp290; + fftw_real tmp492; + fftw_real tmp487; + fftw_real tmp493; + fftw_real tmp226; + fftw_real tmp484; + fftw_real tmp254; + fftw_real tmp284; + fftw_real tmp294; + fftw_real tmp300; + fftw_real tmp281; + fftw_real tmp285; + fftw_real tmp297; + fftw_real tmp301; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp202; + fftw_real tmp288; + fftw_real tmp289; + fftw_real tmp485; + fftw_real tmp214; + fftw_real tmp225; + ASSERT_ALIGNED_DOUBLE; + tmp202 = K707106781 * (tmp196 - tmp201); + tmp203 = tmp191 - tmp202; + tmp287 = tmp191 + tmp202; + tmp288 = (K923879532 * tmp208) + (K382683432 * tmp213); + tmp289 = (K382683432 * tmp224) - (K923879532 * tmp219); + tmp290 = tmp288 + tmp289; + tmp492 = tmp289 - tmp288; + tmp485 = K707106781 * (tmp305 - tmp304); + tmp487 = tmp485 + tmp486; + tmp493 = tmp486 - tmp485; + tmp214 = (K382683432 * tmp208) - (K923879532 * tmp213); + tmp225 = (K382683432 * tmp219) + (K923879532 * tmp224); + tmp226 = tmp214 - tmp225; + tmp484 = tmp214 + tmp225; + } + { + fftw_real tmp244; + fftw_real tmp253; + fftw_real tmp292; + fftw_real tmp293; + ASSERT_ALIGNED_DOUBLE; + tmp244 = tmp232 - tmp243; + tmp253 = tmp249 - tmp252; + tmp254 = (K980785280 * tmp244) + (K195090322 * tmp253); + tmp284 = (K195090322 * tmp244) - (K980785280 * tmp253); + tmp292 = tmp232 + tmp243; + tmp293 = tmp249 + tmp252; + tmp294 = (K555570233 * tmp292) + (K831469612 * tmp293); + tmp300 = (K831469612 * tmp292) - (K555570233 * tmp293); + } + { + fftw_real tmp271; + fftw_real tmp280; + fftw_real tmp295; + fftw_real tmp296; + ASSERT_ALIGNED_DOUBLE; + tmp271 = tmp259 - tmp270; + tmp280 = tmp276 - tmp279; + tmp281 = (K195090322 * tmp271) - (K980785280 * tmp280); + tmp285 = (K195090322 * tmp280) + (K980785280 * tmp271); + tmp295 = tmp259 + tmp270; + tmp296 = tmp276 + tmp279; + tmp297 = (K831469612 * tmp295) - (K555570233 * tmp296); + tmp301 = (K831469612 * tmp296) + (K555570233 * tmp295); + } + { + fftw_real tmp227; + fftw_real tmp282; + fftw_real tmp283; + fftw_real tmp286; + ASSERT_ALIGNED_DOUBLE; + tmp227 = tmp203 + tmp226; + tmp282 = tmp254 + tmp281; + c_re(inout[23 * iostride]) = tmp227 - tmp282; + c_re(inout[7 * iostride]) = tmp227 + tmp282; + tmp283 = tmp203 - tmp226; + tmp286 = tmp284 - tmp285; + c_re(inout[31 * iostride]) = tmp283 - tmp286; + c_re(inout[15 * iostride]) = tmp283 + tmp286; + } + { + fftw_real tmp491; + fftw_real tmp494; + fftw_real tmp495; + fftw_real tmp496; + ASSERT_ALIGNED_DOUBLE; + tmp491 = tmp284 + tmp285; + tmp494 = tmp492 + tmp493; + c_im(inout[7 * iostride]) = tmp491 + tmp494; + c_im(inout[23 * iostride]) = tmp494 - tmp491; + tmp495 = tmp281 - tmp254; + tmp496 = tmp493 - tmp492; + c_im(inout[15 * iostride]) = tmp495 + tmp496; + c_im(inout[31 * iostride]) = tmp496 - tmp495; + } + { + fftw_real tmp291; + fftw_real tmp298; + fftw_real tmp299; + fftw_real tmp302; + ASSERT_ALIGNED_DOUBLE; + tmp291 = tmp287 + tmp290; + tmp298 = tmp294 + tmp297; + c_re(inout[19 * iostride]) = tmp291 - tmp298; + c_re(inout[3 * iostride]) = tmp291 + tmp298; + tmp299 = tmp287 - tmp290; + tmp302 = tmp300 - tmp301; + c_re(inout[27 * iostride]) = tmp299 - tmp302; + c_re(inout[11 * iostride]) = tmp299 + tmp302; + } + { + fftw_real tmp483; + fftw_real tmp488; + fftw_real tmp489; + fftw_real tmp490; + ASSERT_ALIGNED_DOUBLE; + tmp483 = tmp300 + tmp301; + tmp488 = tmp484 + tmp487; + c_im(inout[3 * iostride]) = tmp483 + tmp488; + c_im(inout[19 * iostride]) = tmp488 - tmp483; + tmp489 = tmp297 - tmp294; + tmp490 = tmp487 - tmp484; + c_im(inout[11 * iostride]) = tmp489 + tmp490; + c_im(inout[27 * iostride]) = tmp490 - tmp489; + } + } + } +} + +static const int twiddle_order[] = + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, +20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 }; +fftw_codelet_desc fftw_twiddle_32_desc = { + "fftw_twiddle_32", + (void (*)()) fftw_twiddle_32, + 32, + FFTW_FORWARD, + FFTW_TWIDDLE, + 704, + 31, + twiddle_order, +}; diff --git a/src/fftw/ftw_4.c b/src/fftw/ftw_4.c new file mode 100644 index 0000000..c04b449 --- /dev/null +++ b/src/fftw/ftw_4.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:07:31 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 4 */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 14 stack variables, and 16 memory accesses + */ + +/* + * Generator Id's : + * $Id: ftw_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_twiddle_4(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 3) { + fftw_real tmp1; + fftw_real tmp25; + fftw_real tmp6; + fftw_real tmp24; + fftw_real tmp12; + fftw_real tmp20; + fftw_real tmp17; + fftw_real tmp21; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp25 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[2 * iostride]); + tmp5 = c_im(inout[2 * iostride]); + tmp2 = c_re(W[1]); + tmp4 = c_im(W[1]); + tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5); + tmp24 = (tmp4 * tmp3) + (tmp2 * tmp5); + } + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[iostride]); + tmp11 = c_im(inout[iostride]); + tmp8 = c_re(W[0]); + tmp10 = c_im(W[0]); + tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11); + tmp20 = (tmp10 * tmp9) + (tmp8 * tmp11); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[3 * iostride]); + tmp16 = c_im(inout[3 * iostride]); + tmp13 = c_re(W[2]); + tmp15 = c_im(W[2]); + tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16); + tmp21 = (tmp15 * tmp14) + (tmp13 * tmp16); + } + { + fftw_real tmp7; + fftw_real tmp18; + fftw_real tmp27; + fftw_real tmp28; + ASSERT_ALIGNED_DOUBLE; + tmp7 = tmp1 + tmp6; + tmp18 = tmp12 + tmp17; + c_re(inout[2 * iostride]) = tmp7 - tmp18; + c_re(inout[0]) = tmp7 + tmp18; + tmp27 = tmp25 - tmp24; + tmp28 = tmp12 - tmp17; + c_im(inout[iostride]) = tmp27 - tmp28; + c_im(inout[3 * iostride]) = tmp28 + tmp27; + } + { + fftw_real tmp23; + fftw_real tmp26; + fftw_real tmp19; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp23 = tmp20 + tmp21; + tmp26 = tmp24 + tmp25; + c_im(inout[0]) = tmp23 + tmp26; + c_im(inout[2 * iostride]) = tmp26 - tmp23; + tmp19 = tmp1 - tmp6; + tmp22 = tmp20 - tmp21; + c_re(inout[3 * iostride]) = tmp19 - tmp22; + c_re(inout[iostride]) = tmp19 + tmp22; + } + } +} + +static const int twiddle_order[] = { 1, 2, 3 }; +fftw_codelet_desc fftw_twiddle_4_desc = { + "fftw_twiddle_4", + (void (*)()) fftw_twiddle_4, + 4, + FFTW_FORWARD, + FFTW_TWIDDLE, + 88, + 3, + twiddle_order, +}; diff --git a/src/fftw/ftw_5.c b/src/fftw/ftw_5.c new file mode 100644 index 0000000..157b3f7 --- /dev/null +++ b/src/fftw/ftw_5.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:07:32 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 5 */ + +/* + * This function contains 40 FP additions, 28 FP multiplications, + * (or, 26 additions, 14 multiplications, 14 fused multiply/add), + * 26 stack variables, and 20 memory accesses + */ +static const fftw_real K250000000 = +FFTW_KONST(+0.250000000000000000000000000000000000000000000); +static const fftw_real K559016994 = +FFTW_KONST(+0.559016994374947424102293417182819058860154590); +static const fftw_real K587785252 = +FFTW_KONST(+0.587785252292473129168705954639072768597652438); +static const fftw_real K951056516 = +FFTW_KONST(+0.951056516295153572116439333379382143405698634); + +/* + * Generator Id's : + * $Id: ftw_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_twiddle_5(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 4) { + fftw_real tmp1; + fftw_real tmp40; + fftw_real tmp30; + fftw_real tmp33; + fftw_real tmp37; + fftw_real tmp38; + fftw_real tmp39; + fftw_real tmp45; + fftw_real tmp44; + fftw_real tmp12; + fftw_real tmp23; + fftw_real tmp24; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp40 = c_im(inout[0]); + { + fftw_real tmp6; + fftw_real tmp28; + fftw_real tmp22; + fftw_real tmp32; + fftw_real tmp11; + fftw_real tmp29; + fftw_real tmp17; + fftw_real tmp31; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[iostride]); + tmp5 = c_im(inout[iostride]); + tmp2 = c_re(W[0]); + tmp4 = c_im(W[0]); + tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5); + tmp28 = (tmp4 * tmp3) + (tmp2 * tmp5); + } + { + fftw_real tmp19; + fftw_real tmp21; + fftw_real tmp18; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + tmp19 = c_re(inout[3 * iostride]); + tmp21 = c_im(inout[3 * iostride]); + tmp18 = c_re(W[2]); + tmp20 = c_im(W[2]); + tmp22 = (tmp18 * tmp19) - (tmp20 * tmp21); + tmp32 = (tmp20 * tmp19) + (tmp18 * tmp21); + } + { + fftw_real tmp8; + fftw_real tmp10; + fftw_real tmp7; + fftw_real tmp9; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(inout[4 * iostride]); + tmp10 = c_im(inout[4 * iostride]); + tmp7 = c_re(W[3]); + tmp9 = c_im(W[3]); + tmp11 = (tmp7 * tmp8) - (tmp9 * tmp10); + tmp29 = (tmp9 * tmp8) + (tmp7 * tmp10); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[2 * iostride]); + tmp16 = c_im(inout[2 * iostride]); + tmp13 = c_re(W[1]); + tmp15 = c_im(W[1]); + tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16); + tmp31 = (tmp15 * tmp14) + (tmp13 * tmp16); + } + tmp30 = tmp28 - tmp29; + tmp33 = tmp31 - tmp32; + tmp37 = tmp28 + tmp29; + tmp38 = tmp31 + tmp32; + tmp39 = tmp37 + tmp38; + tmp45 = tmp17 - tmp22; + tmp44 = tmp6 - tmp11; + tmp12 = tmp6 + tmp11; + tmp23 = tmp17 + tmp22; + tmp24 = tmp12 + tmp23; + } + c_re(inout[0]) = tmp1 + tmp24; + { + fftw_real tmp34; + fftw_real tmp36; + fftw_real tmp27; + fftw_real tmp35; + fftw_real tmp25; + fftw_real tmp26; + ASSERT_ALIGNED_DOUBLE; + tmp34 = (K951056516 * tmp30) + (K587785252 * tmp33); + tmp36 = (K951056516 * tmp33) - (K587785252 * tmp30); + tmp25 = K559016994 * (tmp12 - tmp23); + tmp26 = tmp1 - (K250000000 * tmp24); + tmp27 = tmp25 + tmp26; + tmp35 = tmp26 - tmp25; + c_re(inout[4 * iostride]) = tmp27 - tmp34; + c_re(inout[iostride]) = tmp27 + tmp34; + c_re(inout[2 * iostride]) = tmp35 - tmp36; + c_re(inout[3 * iostride]) = tmp35 + tmp36; + } + c_im(inout[0]) = tmp39 + tmp40; + { + fftw_real tmp46; + fftw_real tmp47; + fftw_real tmp43; + fftw_real tmp48; + fftw_real tmp41; + fftw_real tmp42; + ASSERT_ALIGNED_DOUBLE; + tmp46 = (K951056516 * tmp44) + (K587785252 * tmp45); + tmp47 = (K951056516 * tmp45) - (K587785252 * tmp44); + tmp41 = K559016994 * (tmp37 - tmp38); + tmp42 = tmp40 - (K250000000 * tmp39); + tmp43 = tmp41 + tmp42; + tmp48 = tmp42 - tmp41; + c_im(inout[iostride]) = tmp43 - tmp46; + c_im(inout[4 * iostride]) = tmp46 + tmp43; + c_im(inout[2 * iostride]) = tmp47 + tmp48; + c_im(inout[3 * iostride]) = tmp48 - tmp47; + } + } +} + +static const int twiddle_order[] = { 1, 2, 3, 4 }; +fftw_codelet_desc fftw_twiddle_5_desc = { + "fftw_twiddle_5", + (void (*)()) fftw_twiddle_5, + 5, + FFTW_FORWARD, + FFTW_TWIDDLE, + 110, + 4, + twiddle_order, +}; diff --git a/src/fftw/ftw_6.c b/src/fftw/ftw_6.c new file mode 100644 index 0000000..4d86444 --- /dev/null +++ b/src/fftw/ftw_6.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:07:33 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 6 */ + +/* + * This function contains 46 FP additions, 28 FP multiplications, + * (or, 32 additions, 14 multiplications, 14 fused multiply/add), + * 22 stack variables, and 24 memory accesses + */ +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); + +/* + * Generator Id's : + * $Id: ftw_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_twiddle_6(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 5) { + fftw_real tmp7; + fftw_real tmp31; + fftw_real tmp50; + fftw_real tmp54; + fftw_real tmp29; + fftw_real tmp33; + fftw_real tmp41; + fftw_real tmp45; + fftw_real tmp18; + fftw_real tmp32; + fftw_real tmp38; + fftw_real tmp44; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp49; + fftw_real tmp6; + fftw_real tmp48; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp49 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[3 * iostride]); + tmp5 = c_im(inout[3 * iostride]); + tmp2 = c_re(W[2]); + tmp4 = c_im(W[2]); + tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5); + tmp48 = (tmp4 * tmp3) + (tmp2 * tmp5); + } + tmp7 = tmp1 - tmp6; + tmp31 = tmp1 + tmp6; + tmp50 = tmp48 + tmp49; + tmp54 = tmp49 - tmp48; + } + { + fftw_real tmp23; + fftw_real tmp39; + fftw_real tmp28; + fftw_real tmp40; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp20; + fftw_real tmp22; + fftw_real tmp19; + fftw_real tmp21; + ASSERT_ALIGNED_DOUBLE; + tmp20 = c_re(inout[4 * iostride]); + tmp22 = c_im(inout[4 * iostride]); + tmp19 = c_re(W[3]); + tmp21 = c_im(W[3]); + tmp23 = (tmp19 * tmp20) - (tmp21 * tmp22); + tmp39 = (tmp21 * tmp20) + (tmp19 * tmp22); + } + { + fftw_real tmp25; + fftw_real tmp27; + fftw_real tmp24; + fftw_real tmp26; + ASSERT_ALIGNED_DOUBLE; + tmp25 = c_re(inout[iostride]); + tmp27 = c_im(inout[iostride]); + tmp24 = c_re(W[0]); + tmp26 = c_im(W[0]); + tmp28 = (tmp24 * tmp25) - (tmp26 * tmp27); + tmp40 = (tmp26 * tmp25) + (tmp24 * tmp27); + } + tmp29 = tmp23 - tmp28; + tmp33 = tmp23 + tmp28; + tmp41 = tmp39 - tmp40; + tmp45 = tmp39 + tmp40; + } + { + fftw_real tmp12; + fftw_real tmp36; + fftw_real tmp17; + fftw_real tmp37; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[2 * iostride]); + tmp11 = c_im(inout[2 * iostride]); + tmp8 = c_re(W[1]); + tmp10 = c_im(W[1]); + tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11); + tmp36 = (tmp10 * tmp9) + (tmp8 * tmp11); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[5 * iostride]); + tmp16 = c_im(inout[5 * iostride]); + tmp13 = c_re(W[4]); + tmp15 = c_im(W[4]); + tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16); + tmp37 = (tmp15 * tmp14) + (tmp13 * tmp16); + } + tmp18 = tmp12 - tmp17; + tmp32 = tmp12 + tmp17; + tmp38 = tmp36 - tmp37; + tmp44 = tmp36 + tmp37; + } + { + fftw_real tmp42; + fftw_real tmp30; + fftw_real tmp35; + fftw_real tmp53; + fftw_real tmp55; + fftw_real tmp56; + ASSERT_ALIGNED_DOUBLE; + tmp42 = K866025403 * (tmp38 - tmp41); + tmp30 = tmp18 + tmp29; + tmp35 = tmp7 - (K500000000 * tmp30); + c_re(inout[3 * iostride]) = tmp7 + tmp30; + c_re(inout[iostride]) = tmp35 + tmp42; + c_re(inout[5 * iostride]) = tmp35 - tmp42; + tmp53 = K866025403 * (tmp29 - tmp18); + tmp55 = tmp38 + tmp41; + tmp56 = tmp54 - (K500000000 * tmp55); + c_im(inout[iostride]) = tmp53 + tmp56; + c_im(inout[5 * iostride]) = tmp56 - tmp53; + c_im(inout[3 * iostride]) = tmp55 + tmp54; + } + { + fftw_real tmp46; + fftw_real tmp34; + fftw_real tmp43; + fftw_real tmp52; + fftw_real tmp47; + fftw_real tmp51; + ASSERT_ALIGNED_DOUBLE; + tmp46 = K866025403 * (tmp44 - tmp45); + tmp34 = tmp32 + tmp33; + tmp43 = tmp31 - (K500000000 * tmp34); + c_re(inout[0]) = tmp31 + tmp34; + c_re(inout[4 * iostride]) = tmp43 + tmp46; + c_re(inout[2 * iostride]) = tmp43 - tmp46; + tmp52 = K866025403 * (tmp33 - tmp32); + tmp47 = tmp44 + tmp45; + tmp51 = tmp50 - (K500000000 * tmp47); + c_im(inout[0]) = tmp47 + tmp50; + c_im(inout[4 * iostride]) = tmp52 + tmp51; + c_im(inout[2 * iostride]) = tmp51 - tmp52; + } + } +} + +static const int twiddle_order[] = { 1, 2, 3, 4, 5 }; +fftw_codelet_desc fftw_twiddle_6_desc = { + "fftw_twiddle_6", + (void (*)()) fftw_twiddle_6, + 6, + FFTW_FORWARD, + FFTW_TWIDDLE, + 132, + 5, + twiddle_order, +}; diff --git a/src/fftw/ftw_64.c b/src/fftw/ftw_64.c new file mode 100644 index 0000000..99d6e26 --- /dev/null +++ b/src/fftw/ftw_64.c @@ -0,0 +1,3203 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:07:48 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 64 */ + +/* + * This function contains 1038 FP additions, 500 FP multiplications, + * (or, 808 additions, 270 multiplications, 230 fused multiply/add), + * 162 stack variables, and 256 memory accesses + */ +static const fftw_real K290284677 = +FFTW_KONST(+0.290284677254462367636192375817395274691476278); +static const fftw_real K956940335 = +FFTW_KONST(+0.956940335732208864935797886980269969482849206); +static const fftw_real K881921264 = +FFTW_KONST(+0.881921264348355029712756863660388349508442621); +static const fftw_real K471396736 = +FFTW_KONST(+0.471396736825997648556387625905254377657460319); +static const fftw_real K555570233 = +FFTW_KONST(+0.555570233019602224742830813948532874374937191); +static const fftw_real K831469612 = +FFTW_KONST(+0.831469612302545237078788377617905756738560812); +static const fftw_real K773010453 = +FFTW_KONST(+0.773010453362736960810906609758469800971041293); +static const fftw_real K634393284 = +FFTW_KONST(+0.634393284163645498215171613225493370675687095); +static const fftw_real K098017140 = +FFTW_KONST(+0.098017140329560601994195563888641845861136673); +static const fftw_real K995184726 = +FFTW_KONST(+0.995184726672196886244836953109479921575474869); +static const fftw_real K980785280 = +FFTW_KONST(+0.980785280403230449126182236134239036973933731); +static const fftw_real K195090322 = +FFTW_KONST(+0.195090322016128267848284868477022240927691618); +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); +static const fftw_real K923879532 = +FFTW_KONST(+0.923879532511286756128183189396788286822416626); +static const fftw_real K382683432 = +FFTW_KONST(+0.382683432365089771728459984030398866761344562); + +/* + * Generator Id's : + * $Id: ftw_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_twiddle_64(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 63) { + fftw_real tmp19; + fftw_real tmp791; + fftw_real tmp1109; + fftw_real tmp1139; + fftw_real tmp1047; + fftw_real tmp1077; + fftw_real tmp383; + fftw_real tmp655; + fftw_real tmp66; + fftw_real tmp800; + fftw_real tmp908; + fftw_real tmp956; + fftw_real tmp406; + fftw_real tmp608; + fftw_real tmp662; + fftw_real tmp744; + fftw_real tmp42; + fftw_real tmp1076; + fftw_real tmp794; + fftw_real tmp1042; + fftw_real tmp394; + fftw_real tmp1106; + fftw_real tmp658; + fftw_real tmp1138; + fftw_real tmp329; + fftw_real tmp983; + fftw_real tmp863; + fftw_real tmp927; + fftw_real tmp990; + fftw_real tmp1026; + fftw_real tmp880; + fftw_real tmp930; + fftw_real tmp535; + fftw_real tmp703; + fftw_real tmp576; + fftw_real tmp714; + fftw_real tmp579; + fftw_real tmp704; + fftw_real tmp546; + fftw_real tmp715; + fftw_real tmp376; + fftw_real tmp991; + fftw_real tmp868; + fftw_real tmp882; + fftw_real tmp986; + fftw_real tmp1027; + fftw_real tmp873; + fftw_real tmp881; + fftw_real tmp558; + fftw_real tmp582; + fftw_real tmp708; + fftw_real tmp718; + fftw_real tmp569; + fftw_real tmp581; + fftw_real tmp711; + fftw_real tmp717; + fftw_real tmp89; + fftw_real tmp805; + fftw_real tmp909; + fftw_real tmp957; + fftw_real tmp417; + fftw_real tmp609; + fftw_real tmp665; + fftw_real tmp745; + fftw_real tmp161; + fftw_real tmp184; + fftw_real tmp965; + fftw_real tmp823; + fftw_real tmp915; + fftw_real tmp966; + fftw_real tmp967; + fftw_real tmp968; + fftw_real tmp828; + fftw_real tmp916; + fftw_real tmp451; + fftw_real tmp678; + fftw_real tmp468; + fftw_real tmp675; + fftw_real tmp471; + fftw_real tmp679; + fftw_real tmp462; + fftw_real tmp676; + fftw_real tmp114; + fftw_real tmp137; + fftw_real tmp963; + fftw_real tmp812; + fftw_real tmp912; + fftw_real tmp960; + fftw_real tmp961; + fftw_real tmp962; + fftw_real tmp817; + fftw_real tmp913; + fftw_real tmp424; + fftw_real tmp668; + fftw_real tmp441; + fftw_real tmp671; + fftw_real tmp444; + fftw_real tmp669; + fftw_real tmp435; + fftw_real tmp672; + fftw_real tmp234; + fftw_real tmp977; + fftw_real tmp836; + fftw_real tmp923; + fftw_real tmp974; + fftw_real tmp1021; + fftw_real tmp853; + fftw_real tmp920; + fftw_real tmp480; + fftw_real tmp684; + fftw_real tmp521; + fftw_real tmp695; + fftw_real tmp524; + fftw_real tmp685; + fftw_real tmp491; + fftw_real tmp696; + fftw_real tmp281; + fftw_real tmp975; + fftw_real tmp841; + fftw_real tmp855; + fftw_real tmp980; + fftw_real tmp1022; + fftw_real tmp846; + fftw_real tmp854; + fftw_real tmp503; + fftw_real tmp527; + fftw_real tmp689; + fftw_real tmp699; + fftw_real tmp514; + fftw_real tmp526; + fftw_real tmp692; + fftw_real tmp698; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp1045; + fftw_real tmp6; + fftw_real tmp1044; + fftw_real tmp12; + fftw_real tmp380; + fftw_real tmp17; + fftw_real tmp381; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp1045 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[32 * iostride]); + tmp5 = c_im(inout[32 * iostride]); + tmp2 = c_re(W[31]); + tmp4 = c_im(W[31]); + tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5); + tmp1044 = (tmp4 * tmp3) + (tmp2 * tmp5); + } + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[16 * iostride]); + tmp11 = c_im(inout[16 * iostride]); + tmp8 = c_re(W[15]); + tmp10 = c_im(W[15]); + tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11); + tmp380 = (tmp10 * tmp9) + (tmp8 * tmp11); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[48 * iostride]); + tmp16 = c_im(inout[48 * iostride]); + tmp13 = c_re(W[47]); + tmp15 = c_im(W[47]); + tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16); + tmp381 = (tmp15 * tmp14) + (tmp13 * tmp16); + } + { + fftw_real tmp7; + fftw_real tmp18; + fftw_real tmp1107; + fftw_real tmp1108; + ASSERT_ALIGNED_DOUBLE; + tmp7 = tmp1 + tmp6; + tmp18 = tmp12 + tmp17; + tmp19 = tmp7 + tmp18; + tmp791 = tmp7 - tmp18; + tmp1107 = tmp1045 - tmp1044; + tmp1108 = tmp12 - tmp17; + tmp1109 = tmp1107 - tmp1108; + tmp1139 = tmp1108 + tmp1107; + } + { + fftw_real tmp1043; + fftw_real tmp1046; + fftw_real tmp379; + fftw_real tmp382; + ASSERT_ALIGNED_DOUBLE; + tmp1043 = tmp380 + tmp381; + tmp1046 = tmp1044 + tmp1045; + tmp1047 = tmp1043 + tmp1046; + tmp1077 = tmp1046 - tmp1043; + tmp379 = tmp1 - tmp6; + tmp382 = tmp380 - tmp381; + tmp383 = tmp379 - tmp382; + tmp655 = tmp379 + tmp382; + } + } + { + fftw_real tmp54; + fftw_real tmp401; + fftw_real tmp398; + fftw_real tmp796; + fftw_real tmp65; + fftw_real tmp399; + fftw_real tmp404; + fftw_real tmp797; + fftw_real tmp798; + fftw_real tmp799; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp48; + fftw_real tmp396; + fftw_real tmp53; + fftw_real tmp397; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp45; + fftw_real tmp47; + fftw_real tmp44; + fftw_real tmp46; + ASSERT_ALIGNED_DOUBLE; + tmp45 = c_re(inout[4 * iostride]); + tmp47 = c_im(inout[4 * iostride]); + tmp44 = c_re(W[3]); + tmp46 = c_im(W[3]); + tmp48 = (tmp44 * tmp45) - (tmp46 * tmp47); + tmp396 = (tmp46 * tmp45) + (tmp44 * tmp47); + } + { + fftw_real tmp50; + fftw_real tmp52; + fftw_real tmp49; + fftw_real tmp51; + ASSERT_ALIGNED_DOUBLE; + tmp50 = c_re(inout[36 * iostride]); + tmp52 = c_im(inout[36 * iostride]); + tmp49 = c_re(W[35]); + tmp51 = c_im(W[35]); + tmp53 = (tmp49 * tmp50) - (tmp51 * tmp52); + tmp397 = (tmp51 * tmp50) + (tmp49 * tmp52); + } + tmp54 = tmp48 + tmp53; + tmp401 = tmp48 - tmp53; + tmp398 = tmp396 - tmp397; + tmp796 = tmp396 + tmp397; + } + { + fftw_real tmp59; + fftw_real tmp402; + fftw_real tmp64; + fftw_real tmp403; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp56; + fftw_real tmp58; + fftw_real tmp55; + fftw_real tmp57; + ASSERT_ALIGNED_DOUBLE; + tmp56 = c_re(inout[20 * iostride]); + tmp58 = c_im(inout[20 * iostride]); + tmp55 = c_re(W[19]); + tmp57 = c_im(W[19]); + tmp59 = (tmp55 * tmp56) - (tmp57 * tmp58); + tmp402 = (tmp57 * tmp56) + (tmp55 * tmp58); + } + { + fftw_real tmp61; + fftw_real tmp63; + fftw_real tmp60; + fftw_real tmp62; + ASSERT_ALIGNED_DOUBLE; + tmp61 = c_re(inout[52 * iostride]); + tmp63 = c_im(inout[52 * iostride]); + tmp60 = c_re(W[51]); + tmp62 = c_im(W[51]); + tmp64 = (tmp60 * tmp61) - (tmp62 * tmp63); + tmp403 = (tmp62 * tmp61) + (tmp60 * tmp63); + } + tmp65 = tmp59 + tmp64; + tmp399 = tmp59 - tmp64; + tmp404 = tmp402 - tmp403; + tmp797 = tmp402 + tmp403; + } + tmp66 = tmp54 + tmp65; + tmp798 = tmp796 - tmp797; + tmp799 = tmp54 - tmp65; + tmp800 = tmp798 - tmp799; + tmp908 = tmp799 + tmp798; + tmp956 = tmp796 + tmp797; + { + fftw_real tmp400; + fftw_real tmp405; + fftw_real tmp660; + fftw_real tmp661; + ASSERT_ALIGNED_DOUBLE; + tmp400 = tmp398 + tmp399; + tmp405 = tmp401 - tmp404; + tmp406 = (K382683432 * tmp400) - (K923879532 * tmp405); + tmp608 = (K923879532 * tmp400) + (K382683432 * tmp405); + tmp660 = tmp398 - tmp399; + tmp661 = tmp401 + tmp404; + tmp662 = (K923879532 * tmp660) - (K382683432 * tmp661); + tmp744 = (K382683432 * tmp660) + (K923879532 * tmp661); + } + } + { + fftw_real tmp24; + fftw_real tmp384; + fftw_real tmp29; + fftw_real tmp385; + fftw_real tmp386; + fftw_real tmp387; + fftw_real tmp35; + fftw_real tmp390; + fftw_real tmp40; + fftw_real tmp391; + fftw_real tmp389; + fftw_real tmp392; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp21; + fftw_real tmp23; + fftw_real tmp20; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp21 = c_re(inout[8 * iostride]); + tmp23 = c_im(inout[8 * iostride]); + tmp20 = c_re(W[7]); + tmp22 = c_im(W[7]); + tmp24 = (tmp20 * tmp21) - (tmp22 * tmp23); + tmp384 = (tmp22 * tmp21) + (tmp20 * tmp23); + } + { + fftw_real tmp26; + fftw_real tmp28; + fftw_real tmp25; + fftw_real tmp27; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(inout[40 * iostride]); + tmp28 = c_im(inout[40 * iostride]); + tmp25 = c_re(W[39]); + tmp27 = c_im(W[39]); + tmp29 = (tmp25 * tmp26) - (tmp27 * tmp28); + tmp385 = (tmp27 * tmp26) + (tmp25 * tmp28); + } + tmp386 = tmp384 - tmp385; + tmp387 = tmp24 - tmp29; + { + fftw_real tmp32; + fftw_real tmp34; + fftw_real tmp31; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(inout[56 * iostride]); + tmp34 = c_im(inout[56 * iostride]); + tmp31 = c_re(W[55]); + tmp33 = c_im(W[55]); + tmp35 = (tmp31 * tmp32) - (tmp33 * tmp34); + tmp390 = (tmp33 * tmp32) + (tmp31 * tmp34); + } + { + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp36; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp37 = c_re(inout[24 * iostride]); + tmp39 = c_im(inout[24 * iostride]); + tmp36 = c_re(W[23]); + tmp38 = c_im(W[23]); + tmp40 = (tmp36 * tmp37) - (tmp38 * tmp39); + tmp391 = (tmp38 * tmp37) + (tmp36 * tmp39); + } + tmp389 = tmp35 - tmp40; + tmp392 = tmp390 - tmp391; + { + fftw_real tmp30; + fftw_real tmp41; + fftw_real tmp792; + fftw_real tmp793; + ASSERT_ALIGNED_DOUBLE; + tmp30 = tmp24 + tmp29; + tmp41 = tmp35 + tmp40; + tmp42 = tmp30 + tmp41; + tmp1076 = tmp41 - tmp30; + tmp792 = tmp384 + tmp385; + tmp793 = tmp390 + tmp391; + tmp794 = tmp792 - tmp793; + tmp1042 = tmp792 + tmp793; + } + { + fftw_real tmp388; + fftw_real tmp393; + fftw_real tmp656; + fftw_real tmp657; + ASSERT_ALIGNED_DOUBLE; + tmp388 = tmp386 - tmp387; + tmp393 = tmp389 + tmp392; + tmp394 = K707106781 * (tmp388 - tmp393); + tmp1106 = K707106781 * (tmp388 + tmp393); + tmp656 = tmp387 + tmp386; + tmp657 = tmp389 - tmp392; + tmp658 = K707106781 * (tmp656 + tmp657); + tmp1138 = K707106781 * (tmp657 - tmp656); + } + } + { + fftw_real tmp287; + fftw_real tmp572; + fftw_real tmp292; + fftw_real tmp573; + fftw_real tmp293; + fftw_real tmp876; + fftw_real tmp327; + fftw_real tmp541; + fftw_real tmp544; + fftw_real tmp861; + fftw_real tmp298; + fftw_real tmp532; + fftw_real tmp303; + fftw_real tmp533; + fftw_real tmp304; + fftw_real tmp877; + fftw_real tmp316; + fftw_real tmp539; + fftw_real tmp538; + fftw_real tmp860; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp284; + fftw_real tmp286; + fftw_real tmp283; + fftw_real tmp285; + ASSERT_ALIGNED_DOUBLE; + tmp284 = c_re(inout[63 * iostride]); + tmp286 = c_im(inout[63 * iostride]); + tmp283 = c_re(W[62]); + tmp285 = c_im(W[62]); + tmp287 = (tmp283 * tmp284) - (tmp285 * tmp286); + tmp572 = (tmp285 * tmp284) + (tmp283 * tmp286); + } + { + fftw_real tmp289; + fftw_real tmp291; + fftw_real tmp288; + fftw_real tmp290; + ASSERT_ALIGNED_DOUBLE; + tmp289 = c_re(inout[31 * iostride]); + tmp291 = c_im(inout[31 * iostride]); + tmp288 = c_re(W[30]); + tmp290 = c_im(W[30]); + tmp292 = (tmp288 * tmp289) - (tmp290 * tmp291); + tmp573 = (tmp290 * tmp289) + (tmp288 * tmp291); + } + tmp293 = tmp287 + tmp292; + tmp876 = tmp572 + tmp573; + { + fftw_real tmp321; + fftw_real tmp542; + fftw_real tmp326; + fftw_real tmp543; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp318; + fftw_real tmp320; + fftw_real tmp317; + fftw_real tmp319; + ASSERT_ALIGNED_DOUBLE; + tmp318 = c_re(inout[55 * iostride]); + tmp320 = c_im(inout[55 * iostride]); + tmp317 = c_re(W[54]); + tmp319 = c_im(W[54]); + tmp321 = (tmp317 * tmp318) - (tmp319 * tmp320); + tmp542 = (tmp319 * tmp318) + (tmp317 * tmp320); + } + { + fftw_real tmp323; + fftw_real tmp325; + fftw_real tmp322; + fftw_real tmp324; + ASSERT_ALIGNED_DOUBLE; + tmp323 = c_re(inout[23 * iostride]); + tmp325 = c_im(inout[23 * iostride]); + tmp322 = c_re(W[22]); + tmp324 = c_im(W[22]); + tmp326 = (tmp322 * tmp323) - (tmp324 * tmp325); + tmp543 = (tmp324 * tmp323) + (tmp322 * tmp325); + } + tmp327 = tmp321 + tmp326; + tmp541 = tmp321 - tmp326; + tmp544 = tmp542 - tmp543; + tmp861 = tmp542 + tmp543; + } + { + fftw_real tmp295; + fftw_real tmp297; + fftw_real tmp294; + fftw_real tmp296; + ASSERT_ALIGNED_DOUBLE; + tmp295 = c_re(inout[15 * iostride]); + tmp297 = c_im(inout[15 * iostride]); + tmp294 = c_re(W[14]); + tmp296 = c_im(W[14]); + tmp298 = (tmp294 * tmp295) - (tmp296 * tmp297); + tmp532 = (tmp296 * tmp295) + (tmp294 * tmp297); + } + { + fftw_real tmp300; + fftw_real tmp302; + fftw_real tmp299; + fftw_real tmp301; + ASSERT_ALIGNED_DOUBLE; + tmp300 = c_re(inout[47 * iostride]); + tmp302 = c_im(inout[47 * iostride]); + tmp299 = c_re(W[46]); + tmp301 = c_im(W[46]); + tmp303 = (tmp299 * tmp300) - (tmp301 * tmp302); + tmp533 = (tmp301 * tmp300) + (tmp299 * tmp302); + } + tmp304 = tmp298 + tmp303; + tmp877 = tmp532 + tmp533; + { + fftw_real tmp310; + fftw_real tmp536; + fftw_real tmp315; + fftw_real tmp537; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp307; + fftw_real tmp309; + fftw_real tmp306; + fftw_real tmp308; + ASSERT_ALIGNED_DOUBLE; + tmp307 = c_re(inout[7 * iostride]); + tmp309 = c_im(inout[7 * iostride]); + tmp306 = c_re(W[6]); + tmp308 = c_im(W[6]); + tmp310 = (tmp306 * tmp307) - (tmp308 * tmp309); + tmp536 = (tmp308 * tmp307) + (tmp306 * tmp309); + } + { + fftw_real tmp312; + fftw_real tmp314; + fftw_real tmp311; + fftw_real tmp313; + ASSERT_ALIGNED_DOUBLE; + tmp312 = c_re(inout[39 * iostride]); + tmp314 = c_im(inout[39 * iostride]); + tmp311 = c_re(W[38]); + tmp313 = c_im(W[38]); + tmp315 = (tmp311 * tmp312) - (tmp313 * tmp314); + tmp537 = (tmp313 * tmp312) + (tmp311 * tmp314); + } + tmp316 = tmp310 + tmp315; + tmp539 = tmp310 - tmp315; + tmp538 = tmp536 - tmp537; + tmp860 = tmp536 + tmp537; + } + { + fftw_real tmp305; + fftw_real tmp328; + fftw_real tmp859; + fftw_real tmp862; + ASSERT_ALIGNED_DOUBLE; + tmp305 = tmp293 + tmp304; + tmp328 = tmp316 + tmp327; + tmp329 = tmp305 + tmp328; + tmp983 = tmp305 - tmp328; + tmp859 = tmp293 - tmp304; + tmp862 = tmp860 - tmp861; + tmp863 = tmp859 - tmp862; + tmp927 = tmp859 + tmp862; + } + { + fftw_real tmp988; + fftw_real tmp989; + fftw_real tmp878; + fftw_real tmp879; + ASSERT_ALIGNED_DOUBLE; + tmp988 = tmp876 + tmp877; + tmp989 = tmp860 + tmp861; + tmp990 = tmp988 - tmp989; + tmp1026 = tmp988 + tmp989; + tmp878 = tmp876 - tmp877; + tmp879 = tmp327 - tmp316; + tmp880 = tmp878 - tmp879; + tmp930 = tmp878 + tmp879; + } + { + fftw_real tmp531; + fftw_real tmp534; + fftw_real tmp574; + fftw_real tmp575; + ASSERT_ALIGNED_DOUBLE; + tmp531 = tmp287 - tmp292; + tmp534 = tmp532 - tmp533; + tmp535 = tmp531 - tmp534; + tmp703 = tmp531 + tmp534; + tmp574 = tmp572 - tmp573; + tmp575 = tmp298 - tmp303; + tmp576 = tmp574 + tmp575; + tmp714 = tmp574 - tmp575; + } + { + fftw_real tmp577; + fftw_real tmp578; + fftw_real tmp540; + fftw_real tmp545; + ASSERT_ALIGNED_DOUBLE; + tmp577 = tmp541 - tmp544; + tmp578 = tmp539 + tmp538; + tmp579 = K707106781 * (tmp577 - tmp578); + tmp704 = K707106781 * (tmp578 + tmp577); + tmp540 = tmp538 - tmp539; + tmp545 = tmp541 + tmp544; + tmp546 = K707106781 * (tmp540 - tmp545); + tmp715 = K707106781 * (tmp540 + tmp545); + } + } + { + fftw_real tmp340; + fftw_real tmp553; + fftw_real tmp550; + fftw_real tmp864; + fftw_real tmp374; + fftw_real tmp562; + fftw_real tmp567; + fftw_real tmp871; + fftw_real tmp351; + fftw_real tmp551; + fftw_real tmp556; + fftw_real tmp865; + fftw_real tmp363; + fftw_real tmp564; + fftw_real tmp561; + fftw_real tmp870; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp334; + fftw_real tmp548; + fftw_real tmp339; + fftw_real tmp549; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp331; + fftw_real tmp333; + fftw_real tmp330; + fftw_real tmp332; + ASSERT_ALIGNED_DOUBLE; + tmp331 = c_re(inout[3 * iostride]); + tmp333 = c_im(inout[3 * iostride]); + tmp330 = c_re(W[2]); + tmp332 = c_im(W[2]); + tmp334 = (tmp330 * tmp331) - (tmp332 * tmp333); + tmp548 = (tmp332 * tmp331) + (tmp330 * tmp333); + } + { + fftw_real tmp336; + fftw_real tmp338; + fftw_real tmp335; + fftw_real tmp337; + ASSERT_ALIGNED_DOUBLE; + tmp336 = c_re(inout[35 * iostride]); + tmp338 = c_im(inout[35 * iostride]); + tmp335 = c_re(W[34]); + tmp337 = c_im(W[34]); + tmp339 = (tmp335 * tmp336) - (tmp337 * tmp338); + tmp549 = (tmp337 * tmp336) + (tmp335 * tmp338); + } + tmp340 = tmp334 + tmp339; + tmp553 = tmp334 - tmp339; + tmp550 = tmp548 - tmp549; + tmp864 = tmp548 + tmp549; + } + { + fftw_real tmp368; + fftw_real tmp565; + fftw_real tmp373; + fftw_real tmp566; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp365; + fftw_real tmp367; + fftw_real tmp364; + fftw_real tmp366; + ASSERT_ALIGNED_DOUBLE; + tmp365 = c_re(inout[11 * iostride]); + tmp367 = c_im(inout[11 * iostride]); + tmp364 = c_re(W[10]); + tmp366 = c_im(W[10]); + tmp368 = (tmp364 * tmp365) - (tmp366 * tmp367); + tmp565 = (tmp366 * tmp365) + (tmp364 * tmp367); + } + { + fftw_real tmp370; + fftw_real tmp372; + fftw_real tmp369; + fftw_real tmp371; + ASSERT_ALIGNED_DOUBLE; + tmp370 = c_re(inout[43 * iostride]); + tmp372 = c_im(inout[43 * iostride]); + tmp369 = c_re(W[42]); + tmp371 = c_im(W[42]); + tmp373 = (tmp369 * tmp370) - (tmp371 * tmp372); + tmp566 = (tmp371 * tmp370) + (tmp369 * tmp372); + } + tmp374 = tmp368 + tmp373; + tmp562 = tmp368 - tmp373; + tmp567 = tmp565 - tmp566; + tmp871 = tmp565 + tmp566; + } + { + fftw_real tmp345; + fftw_real tmp554; + fftw_real tmp350; + fftw_real tmp555; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp342; + fftw_real tmp344; + fftw_real tmp341; + fftw_real tmp343; + ASSERT_ALIGNED_DOUBLE; + tmp342 = c_re(inout[19 * iostride]); + tmp344 = c_im(inout[19 * iostride]); + tmp341 = c_re(W[18]); + tmp343 = c_im(W[18]); + tmp345 = (tmp341 * tmp342) - (tmp343 * tmp344); + tmp554 = (tmp343 * tmp342) + (tmp341 * tmp344); + } + { + fftw_real tmp347; + fftw_real tmp349; + fftw_real tmp346; + fftw_real tmp348; + ASSERT_ALIGNED_DOUBLE; + tmp347 = c_re(inout[51 * iostride]); + tmp349 = c_im(inout[51 * iostride]); + tmp346 = c_re(W[50]); + tmp348 = c_im(W[50]); + tmp350 = (tmp346 * tmp347) - (tmp348 * tmp349); + tmp555 = (tmp348 * tmp347) + (tmp346 * tmp349); + } + tmp351 = tmp345 + tmp350; + tmp551 = tmp345 - tmp350; + tmp556 = tmp554 - tmp555; + tmp865 = tmp554 + tmp555; + } + { + fftw_real tmp357; + fftw_real tmp559; + fftw_real tmp362; + fftw_real tmp560; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp354; + fftw_real tmp356; + fftw_real tmp353; + fftw_real tmp355; + ASSERT_ALIGNED_DOUBLE; + tmp354 = c_re(inout[59 * iostride]); + tmp356 = c_im(inout[59 * iostride]); + tmp353 = c_re(W[58]); + tmp355 = c_im(W[58]); + tmp357 = (tmp353 * tmp354) - (tmp355 * tmp356); + tmp559 = (tmp355 * tmp354) + (tmp353 * tmp356); + } + { + fftw_real tmp359; + fftw_real tmp361; + fftw_real tmp358; + fftw_real tmp360; + ASSERT_ALIGNED_DOUBLE; + tmp359 = c_re(inout[27 * iostride]); + tmp361 = c_im(inout[27 * iostride]); + tmp358 = c_re(W[26]); + tmp360 = c_im(W[26]); + tmp362 = (tmp358 * tmp359) - (tmp360 * tmp361); + tmp560 = (tmp360 * tmp359) + (tmp358 * tmp361); + } + tmp363 = tmp357 + tmp362; + tmp564 = tmp357 - tmp362; + tmp561 = tmp559 - tmp560; + tmp870 = tmp559 + tmp560; + } + { + fftw_real tmp352; + fftw_real tmp375; + fftw_real tmp866; + fftw_real tmp867; + ASSERT_ALIGNED_DOUBLE; + tmp352 = tmp340 + tmp351; + tmp375 = tmp363 + tmp374; + tmp376 = tmp352 + tmp375; + tmp991 = tmp375 - tmp352; + tmp866 = tmp864 - tmp865; + tmp867 = tmp340 - tmp351; + tmp868 = tmp866 - tmp867; + tmp882 = tmp867 + tmp866; + } + { + fftw_real tmp984; + fftw_real tmp985; + fftw_real tmp869; + fftw_real tmp872; + ASSERT_ALIGNED_DOUBLE; + tmp984 = tmp864 + tmp865; + tmp985 = tmp870 + tmp871; + tmp986 = tmp984 - tmp985; + tmp1027 = tmp984 + tmp985; + tmp869 = tmp363 - tmp374; + tmp872 = tmp870 - tmp871; + tmp873 = tmp869 + tmp872; + tmp881 = tmp869 - tmp872; + } + { + fftw_real tmp552; + fftw_real tmp557; + fftw_real tmp706; + fftw_real tmp707; + ASSERT_ALIGNED_DOUBLE; + tmp552 = tmp550 + tmp551; + tmp557 = tmp553 - tmp556; + tmp558 = (K382683432 * tmp552) - (K923879532 * tmp557); + tmp582 = (K923879532 * tmp552) + (K382683432 * tmp557); + tmp706 = tmp550 - tmp551; + tmp707 = tmp553 + tmp556; + tmp708 = (K923879532 * tmp706) - (K382683432 * tmp707); + tmp718 = (K382683432 * tmp706) + (K923879532 * tmp707); + } + { + fftw_real tmp563; + fftw_real tmp568; + fftw_real tmp709; + fftw_real tmp710; + ASSERT_ALIGNED_DOUBLE; + tmp563 = tmp561 + tmp562; + tmp568 = tmp564 - tmp567; + tmp569 = (K382683432 * tmp563) + (K923879532 * tmp568); + tmp581 = (K382683432 * tmp568) - (K923879532 * tmp563); + tmp709 = tmp561 - tmp562; + tmp710 = tmp564 + tmp567; + tmp711 = (K923879532 * tmp709) + (K382683432 * tmp710); + tmp717 = (K923879532 * tmp710) - (K382683432 * tmp709); + } + } + { + fftw_real tmp77; + fftw_real tmp412; + fftw_real tmp409; + fftw_real tmp802; + fftw_real tmp88; + fftw_real tmp410; + fftw_real tmp415; + fftw_real tmp803; + fftw_real tmp801; + fftw_real tmp804; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp71; + fftw_real tmp407; + fftw_real tmp76; + fftw_real tmp408; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp68; + fftw_real tmp70; + fftw_real tmp67; + fftw_real tmp69; + ASSERT_ALIGNED_DOUBLE; + tmp68 = c_re(inout[60 * iostride]); + tmp70 = c_im(inout[60 * iostride]); + tmp67 = c_re(W[59]); + tmp69 = c_im(W[59]); + tmp71 = (tmp67 * tmp68) - (tmp69 * tmp70); + tmp407 = (tmp69 * tmp68) + (tmp67 * tmp70); + } + { + fftw_real tmp73; + fftw_real tmp75; + fftw_real tmp72; + fftw_real tmp74; + ASSERT_ALIGNED_DOUBLE; + tmp73 = c_re(inout[28 * iostride]); + tmp75 = c_im(inout[28 * iostride]); + tmp72 = c_re(W[27]); + tmp74 = c_im(W[27]); + tmp76 = (tmp72 * tmp73) - (tmp74 * tmp75); + tmp408 = (tmp74 * tmp73) + (tmp72 * tmp75); + } + tmp77 = tmp71 + tmp76; + tmp412 = tmp71 - tmp76; + tmp409 = tmp407 - tmp408; + tmp802 = tmp407 + tmp408; + } + { + fftw_real tmp82; + fftw_real tmp413; + fftw_real tmp87; + fftw_real tmp414; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp79; + fftw_real tmp81; + fftw_real tmp78; + fftw_real tmp80; + ASSERT_ALIGNED_DOUBLE; + tmp79 = c_re(inout[12 * iostride]); + tmp81 = c_im(inout[12 * iostride]); + tmp78 = c_re(W[11]); + tmp80 = c_im(W[11]); + tmp82 = (tmp78 * tmp79) - (tmp80 * tmp81); + tmp413 = (tmp80 * tmp79) + (tmp78 * tmp81); + } + { + fftw_real tmp84; + fftw_real tmp86; + fftw_real tmp83; + fftw_real tmp85; + ASSERT_ALIGNED_DOUBLE; + tmp84 = c_re(inout[44 * iostride]); + tmp86 = c_im(inout[44 * iostride]); + tmp83 = c_re(W[43]); + tmp85 = c_im(W[43]); + tmp87 = (tmp83 * tmp84) - (tmp85 * tmp86); + tmp414 = (tmp85 * tmp84) + (tmp83 * tmp86); + } + tmp88 = tmp82 + tmp87; + tmp410 = tmp82 - tmp87; + tmp415 = tmp413 - tmp414; + tmp803 = tmp413 + tmp414; + } + tmp89 = tmp77 + tmp88; + tmp801 = tmp77 - tmp88; + tmp804 = tmp802 - tmp803; + tmp805 = tmp801 + tmp804; + tmp909 = tmp801 - tmp804; + tmp957 = tmp802 + tmp803; + { + fftw_real tmp411; + fftw_real tmp416; + fftw_real tmp663; + fftw_real tmp664; + ASSERT_ALIGNED_DOUBLE; + tmp411 = tmp409 + tmp410; + tmp416 = tmp412 - tmp415; + tmp417 = (K382683432 * tmp411) + (K923879532 * tmp416); + tmp609 = (K382683432 * tmp416) - (K923879532 * tmp411); + tmp663 = tmp409 - tmp410; + tmp664 = tmp412 + tmp415; + tmp665 = (K923879532 * tmp663) + (K382683432 * tmp664); + tmp745 = (K923879532 * tmp664) - (K382683432 * tmp663); + } + } + { + fftw_real tmp143; + fftw_real tmp447; + fftw_real tmp148; + fftw_real tmp448; + fftw_real tmp149; + fftw_real tmp819; + fftw_real tmp183; + fftw_real tmp452; + fftw_real tmp455; + fftw_real tmp826; + fftw_real tmp154; + fftw_real tmp465; + fftw_real tmp159; + fftw_real tmp466; + fftw_real tmp160; + fftw_real tmp820; + fftw_real tmp172; + fftw_real tmp457; + fftw_real tmp460; + fftw_real tmp825; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp140; + fftw_real tmp142; + fftw_real tmp139; + fftw_real tmp141; + ASSERT_ALIGNED_DOUBLE; + tmp140 = c_re(inout[62 * iostride]); + tmp142 = c_im(inout[62 * iostride]); + tmp139 = c_re(W[61]); + tmp141 = c_im(W[61]); + tmp143 = (tmp139 * tmp140) - (tmp141 * tmp142); + tmp447 = (tmp141 * tmp140) + (tmp139 * tmp142); + } + { + fftw_real tmp145; + fftw_real tmp147; + fftw_real tmp144; + fftw_real tmp146; + ASSERT_ALIGNED_DOUBLE; + tmp145 = c_re(inout[30 * iostride]); + tmp147 = c_im(inout[30 * iostride]); + tmp144 = c_re(W[29]); + tmp146 = c_im(W[29]); + tmp148 = (tmp144 * tmp145) - (tmp146 * tmp147); + tmp448 = (tmp146 * tmp145) + (tmp144 * tmp147); + } + tmp149 = tmp143 + tmp148; + tmp819 = tmp447 + tmp448; + { + fftw_real tmp177; + fftw_real tmp453; + fftw_real tmp182; + fftw_real tmp454; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp174; + fftw_real tmp176; + fftw_real tmp173; + fftw_real tmp175; + ASSERT_ALIGNED_DOUBLE; + tmp174 = c_re(inout[54 * iostride]); + tmp176 = c_im(inout[54 * iostride]); + tmp173 = c_re(W[53]); + tmp175 = c_im(W[53]); + tmp177 = (tmp173 * tmp174) - (tmp175 * tmp176); + tmp453 = (tmp175 * tmp174) + (tmp173 * tmp176); + } + { + fftw_real tmp179; + fftw_real tmp181; + fftw_real tmp178; + fftw_real tmp180; + ASSERT_ALIGNED_DOUBLE; + tmp179 = c_re(inout[22 * iostride]); + tmp181 = c_im(inout[22 * iostride]); + tmp178 = c_re(W[21]); + tmp180 = c_im(W[21]); + tmp182 = (tmp178 * tmp179) - (tmp180 * tmp181); + tmp454 = (tmp180 * tmp179) + (tmp178 * tmp181); + } + tmp183 = tmp177 + tmp182; + tmp452 = tmp177 - tmp182; + tmp455 = tmp453 - tmp454; + tmp826 = tmp453 + tmp454; + } + { + fftw_real tmp151; + fftw_real tmp153; + fftw_real tmp150; + fftw_real tmp152; + ASSERT_ALIGNED_DOUBLE; + tmp151 = c_re(inout[14 * iostride]); + tmp153 = c_im(inout[14 * iostride]); + tmp150 = c_re(W[13]); + tmp152 = c_im(W[13]); + tmp154 = (tmp150 * tmp151) - (tmp152 * tmp153); + tmp465 = (tmp152 * tmp151) + (tmp150 * tmp153); + } + { + fftw_real tmp156; + fftw_real tmp158; + fftw_real tmp155; + fftw_real tmp157; + ASSERT_ALIGNED_DOUBLE; + tmp156 = c_re(inout[46 * iostride]); + tmp158 = c_im(inout[46 * iostride]); + tmp155 = c_re(W[45]); + tmp157 = c_im(W[45]); + tmp159 = (tmp155 * tmp156) - (tmp157 * tmp158); + tmp466 = (tmp157 * tmp156) + (tmp155 * tmp158); + } + tmp160 = tmp154 + tmp159; + tmp820 = tmp465 + tmp466; + { + fftw_real tmp166; + fftw_real tmp458; + fftw_real tmp171; + fftw_real tmp459; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp163; + fftw_real tmp165; + fftw_real tmp162; + fftw_real tmp164; + ASSERT_ALIGNED_DOUBLE; + tmp163 = c_re(inout[6 * iostride]); + tmp165 = c_im(inout[6 * iostride]); + tmp162 = c_re(W[5]); + tmp164 = c_im(W[5]); + tmp166 = (tmp162 * tmp163) - (tmp164 * tmp165); + tmp458 = (tmp164 * tmp163) + (tmp162 * tmp165); + } + { + fftw_real tmp168; + fftw_real tmp170; + fftw_real tmp167; + fftw_real tmp169; + ASSERT_ALIGNED_DOUBLE; + tmp168 = c_re(inout[38 * iostride]); + tmp170 = c_im(inout[38 * iostride]); + tmp167 = c_re(W[37]); + tmp169 = c_im(W[37]); + tmp171 = (tmp167 * tmp168) - (tmp169 * tmp170); + tmp459 = (tmp169 * tmp168) + (tmp167 * tmp170); + } + tmp172 = tmp166 + tmp171; + tmp457 = tmp166 - tmp171; + tmp460 = tmp458 - tmp459; + tmp825 = tmp458 + tmp459; + } + { + fftw_real tmp821; + fftw_real tmp822; + fftw_real tmp824; + fftw_real tmp827; + ASSERT_ALIGNED_DOUBLE; + tmp161 = tmp149 + tmp160; + tmp184 = tmp172 + tmp183; + tmp965 = tmp161 - tmp184; + tmp821 = tmp819 - tmp820; + tmp822 = tmp183 - tmp172; + tmp823 = tmp821 - tmp822; + tmp915 = tmp821 + tmp822; + tmp966 = tmp819 + tmp820; + tmp967 = tmp825 + tmp826; + tmp968 = tmp966 - tmp967; + tmp824 = tmp149 - tmp160; + tmp827 = tmp825 - tmp826; + tmp828 = tmp824 - tmp827; + tmp916 = tmp824 + tmp827; + } + { + fftw_real tmp449; + fftw_real tmp450; + fftw_real tmp464; + fftw_real tmp467; + ASSERT_ALIGNED_DOUBLE; + tmp449 = tmp447 - tmp448; + tmp450 = tmp154 - tmp159; + tmp451 = tmp449 + tmp450; + tmp678 = tmp449 - tmp450; + tmp464 = tmp143 - tmp148; + tmp467 = tmp465 - tmp466; + tmp468 = tmp464 - tmp467; + tmp675 = tmp464 + tmp467; + } + { + fftw_real tmp469; + fftw_real tmp470; + fftw_real tmp456; + fftw_real tmp461; + ASSERT_ALIGNED_DOUBLE; + tmp469 = tmp460 - tmp457; + tmp470 = tmp452 + tmp455; + tmp471 = K707106781 * (tmp469 - tmp470); + tmp679 = K707106781 * (tmp469 + tmp470); + tmp456 = tmp452 - tmp455; + tmp461 = tmp457 + tmp460; + tmp462 = K707106781 * (tmp456 - tmp461); + tmp676 = K707106781 * (tmp461 + tmp456); + } + } + { + fftw_real tmp96; + fftw_real tmp420; + fftw_real tmp101; + fftw_real tmp421; + fftw_real tmp102; + fftw_real tmp808; + fftw_real tmp136; + fftw_real tmp425; + fftw_real tmp428; + fftw_real tmp815; + fftw_real tmp107; + fftw_real tmp438; + fftw_real tmp112; + fftw_real tmp439; + fftw_real tmp113; + fftw_real tmp809; + fftw_real tmp125; + fftw_real tmp430; + fftw_real tmp433; + fftw_real tmp814; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp93; + fftw_real tmp95; + fftw_real tmp92; + fftw_real tmp94; + ASSERT_ALIGNED_DOUBLE; + tmp93 = c_re(inout[2 * iostride]); + tmp95 = c_im(inout[2 * iostride]); + tmp92 = c_re(W[1]); + tmp94 = c_im(W[1]); + tmp96 = (tmp92 * tmp93) - (tmp94 * tmp95); + tmp420 = (tmp94 * tmp93) + (tmp92 * tmp95); + } + { + fftw_real tmp98; + fftw_real tmp100; + fftw_real tmp97; + fftw_real tmp99; + ASSERT_ALIGNED_DOUBLE; + tmp98 = c_re(inout[34 * iostride]); + tmp100 = c_im(inout[34 * iostride]); + tmp97 = c_re(W[33]); + tmp99 = c_im(W[33]); + tmp101 = (tmp97 * tmp98) - (tmp99 * tmp100); + tmp421 = (tmp99 * tmp98) + (tmp97 * tmp100); + } + tmp102 = tmp96 + tmp101; + tmp808 = tmp420 + tmp421; + { + fftw_real tmp130; + fftw_real tmp426; + fftw_real tmp135; + fftw_real tmp427; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp127; + fftw_real tmp129; + fftw_real tmp126; + fftw_real tmp128; + ASSERT_ALIGNED_DOUBLE; + tmp127 = c_re(inout[58 * iostride]); + tmp129 = c_im(inout[58 * iostride]); + tmp126 = c_re(W[57]); + tmp128 = c_im(W[57]); + tmp130 = (tmp126 * tmp127) - (tmp128 * tmp129); + tmp426 = (tmp128 * tmp127) + (tmp126 * tmp129); + } + { + fftw_real tmp132; + fftw_real tmp134; + fftw_real tmp131; + fftw_real tmp133; + ASSERT_ALIGNED_DOUBLE; + tmp132 = c_re(inout[26 * iostride]); + tmp134 = c_im(inout[26 * iostride]); + tmp131 = c_re(W[25]); + tmp133 = c_im(W[25]); + tmp135 = (tmp131 * tmp132) - (tmp133 * tmp134); + tmp427 = (tmp133 * tmp132) + (tmp131 * tmp134); + } + tmp136 = tmp130 + tmp135; + tmp425 = tmp130 - tmp135; + tmp428 = tmp426 - tmp427; + tmp815 = tmp426 + tmp427; + } + { + fftw_real tmp104; + fftw_real tmp106; + fftw_real tmp103; + fftw_real tmp105; + ASSERT_ALIGNED_DOUBLE; + tmp104 = c_re(inout[18 * iostride]); + tmp106 = c_im(inout[18 * iostride]); + tmp103 = c_re(W[17]); + tmp105 = c_im(W[17]); + tmp107 = (tmp103 * tmp104) - (tmp105 * tmp106); + tmp438 = (tmp105 * tmp104) + (tmp103 * tmp106); + } + { + fftw_real tmp109; + fftw_real tmp111; + fftw_real tmp108; + fftw_real tmp110; + ASSERT_ALIGNED_DOUBLE; + tmp109 = c_re(inout[50 * iostride]); + tmp111 = c_im(inout[50 * iostride]); + tmp108 = c_re(W[49]); + tmp110 = c_im(W[49]); + tmp112 = (tmp108 * tmp109) - (tmp110 * tmp111); + tmp439 = (tmp110 * tmp109) + (tmp108 * tmp111); + } + tmp113 = tmp107 + tmp112; + tmp809 = tmp438 + tmp439; + { + fftw_real tmp119; + fftw_real tmp431; + fftw_real tmp124; + fftw_real tmp432; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp116; + fftw_real tmp118; + fftw_real tmp115; + fftw_real tmp117; + ASSERT_ALIGNED_DOUBLE; + tmp116 = c_re(inout[10 * iostride]); + tmp118 = c_im(inout[10 * iostride]); + tmp115 = c_re(W[9]); + tmp117 = c_im(W[9]); + tmp119 = (tmp115 * tmp116) - (tmp117 * tmp118); + tmp431 = (tmp117 * tmp116) + (tmp115 * tmp118); + } + { + fftw_real tmp121; + fftw_real tmp123; + fftw_real tmp120; + fftw_real tmp122; + ASSERT_ALIGNED_DOUBLE; + tmp121 = c_re(inout[42 * iostride]); + tmp123 = c_im(inout[42 * iostride]); + tmp120 = c_re(W[41]); + tmp122 = c_im(W[41]); + tmp124 = (tmp120 * tmp121) - (tmp122 * tmp123); + tmp432 = (tmp122 * tmp121) + (tmp120 * tmp123); + } + tmp125 = tmp119 + tmp124; + tmp430 = tmp119 - tmp124; + tmp433 = tmp431 - tmp432; + tmp814 = tmp431 + tmp432; + } + { + fftw_real tmp810; + fftw_real tmp811; + fftw_real tmp813; + fftw_real tmp816; + ASSERT_ALIGNED_DOUBLE; + tmp114 = tmp102 + tmp113; + tmp137 = tmp125 + tmp136; + tmp963 = tmp114 - tmp137; + tmp810 = tmp808 - tmp809; + tmp811 = tmp136 - tmp125; + tmp812 = tmp810 - tmp811; + tmp912 = tmp810 + tmp811; + tmp960 = tmp808 + tmp809; + tmp961 = tmp814 + tmp815; + tmp962 = tmp960 - tmp961; + tmp813 = tmp102 - tmp113; + tmp816 = tmp814 - tmp815; + tmp817 = tmp813 - tmp816; + tmp913 = tmp813 + tmp816; + } + { + fftw_real tmp422; + fftw_real tmp423; + fftw_real tmp437; + fftw_real tmp440; + ASSERT_ALIGNED_DOUBLE; + tmp422 = tmp420 - tmp421; + tmp423 = tmp107 - tmp112; + tmp424 = tmp422 + tmp423; + tmp668 = tmp422 - tmp423; + tmp437 = tmp96 - tmp101; + tmp440 = tmp438 - tmp439; + tmp441 = tmp437 - tmp440; + tmp671 = tmp437 + tmp440; + } + { + fftw_real tmp442; + fftw_real tmp443; + fftw_real tmp429; + fftw_real tmp434; + ASSERT_ALIGNED_DOUBLE; + tmp442 = tmp433 - tmp430; + tmp443 = tmp425 + tmp428; + tmp444 = K707106781 * (tmp442 - tmp443); + tmp669 = K707106781 * (tmp442 + tmp443); + tmp429 = tmp425 - tmp428; + tmp434 = tmp430 + tmp433; + tmp435 = K707106781 * (tmp429 - tmp434); + tmp672 = K707106781 * (tmp434 + tmp429); + } + } + { + fftw_real tmp192; + fftw_real tmp476; + fftw_real tmp197; + fftw_real tmp477; + fftw_real tmp198; + fftw_real tmp832; + fftw_real tmp232; + fftw_real tmp481; + fftw_real tmp484; + fftw_real tmp851; + fftw_real tmp203; + fftw_real tmp518; + fftw_real tmp208; + fftw_real tmp519; + fftw_real tmp209; + fftw_real tmp833; + fftw_real tmp221; + fftw_real tmp486; + fftw_real tmp489; + fftw_real tmp850; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp189; + fftw_real tmp191; + fftw_real tmp188; + fftw_real tmp190; + ASSERT_ALIGNED_DOUBLE; + tmp189 = c_re(inout[iostride]); + tmp191 = c_im(inout[iostride]); + tmp188 = c_re(W[0]); + tmp190 = c_im(W[0]); + tmp192 = (tmp188 * tmp189) - (tmp190 * tmp191); + tmp476 = (tmp190 * tmp189) + (tmp188 * tmp191); + } + { + fftw_real tmp194; + fftw_real tmp196; + fftw_real tmp193; + fftw_real tmp195; + ASSERT_ALIGNED_DOUBLE; + tmp194 = c_re(inout[33 * iostride]); + tmp196 = c_im(inout[33 * iostride]); + tmp193 = c_re(W[32]); + tmp195 = c_im(W[32]); + tmp197 = (tmp193 * tmp194) - (tmp195 * tmp196); + tmp477 = (tmp195 * tmp194) + (tmp193 * tmp196); + } + tmp198 = tmp192 + tmp197; + tmp832 = tmp476 + tmp477; + { + fftw_real tmp226; + fftw_real tmp482; + fftw_real tmp231; + fftw_real tmp483; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp223; + fftw_real tmp225; + fftw_real tmp222; + fftw_real tmp224; + ASSERT_ALIGNED_DOUBLE; + tmp223 = c_re(inout[57 * iostride]); + tmp225 = c_im(inout[57 * iostride]); + tmp222 = c_re(W[56]); + tmp224 = c_im(W[56]); + tmp226 = (tmp222 * tmp223) - (tmp224 * tmp225); + tmp482 = (tmp224 * tmp223) + (tmp222 * tmp225); + } + { + fftw_real tmp228; + fftw_real tmp230; + fftw_real tmp227; + fftw_real tmp229; + ASSERT_ALIGNED_DOUBLE; + tmp228 = c_re(inout[25 * iostride]); + tmp230 = c_im(inout[25 * iostride]); + tmp227 = c_re(W[24]); + tmp229 = c_im(W[24]); + tmp231 = (tmp227 * tmp228) - (tmp229 * tmp230); + tmp483 = (tmp229 * tmp228) + (tmp227 * tmp230); + } + tmp232 = tmp226 + tmp231; + tmp481 = tmp226 - tmp231; + tmp484 = tmp482 - tmp483; + tmp851 = tmp482 + tmp483; + } + { + fftw_real tmp200; + fftw_real tmp202; + fftw_real tmp199; + fftw_real tmp201; + ASSERT_ALIGNED_DOUBLE; + tmp200 = c_re(inout[17 * iostride]); + tmp202 = c_im(inout[17 * iostride]); + tmp199 = c_re(W[16]); + tmp201 = c_im(W[16]); + tmp203 = (tmp199 * tmp200) - (tmp201 * tmp202); + tmp518 = (tmp201 * tmp200) + (tmp199 * tmp202); + } + { + fftw_real tmp205; + fftw_real tmp207; + fftw_real tmp204; + fftw_real tmp206; + ASSERT_ALIGNED_DOUBLE; + tmp205 = c_re(inout[49 * iostride]); + tmp207 = c_im(inout[49 * iostride]); + tmp204 = c_re(W[48]); + tmp206 = c_im(W[48]); + tmp208 = (tmp204 * tmp205) - (tmp206 * tmp207); + tmp519 = (tmp206 * tmp205) + (tmp204 * tmp207); + } + tmp209 = tmp203 + tmp208; + tmp833 = tmp518 + tmp519; + { + fftw_real tmp215; + fftw_real tmp487; + fftw_real tmp220; + fftw_real tmp488; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp212; + fftw_real tmp214; + fftw_real tmp211; + fftw_real tmp213; + ASSERT_ALIGNED_DOUBLE; + tmp212 = c_re(inout[9 * iostride]); + tmp214 = c_im(inout[9 * iostride]); + tmp211 = c_re(W[8]); + tmp213 = c_im(W[8]); + tmp215 = (tmp211 * tmp212) - (tmp213 * tmp214); + tmp487 = (tmp213 * tmp212) + (tmp211 * tmp214); + } + { + fftw_real tmp217; + fftw_real tmp219; + fftw_real tmp216; + fftw_real tmp218; + ASSERT_ALIGNED_DOUBLE; + tmp217 = c_re(inout[41 * iostride]); + tmp219 = c_im(inout[41 * iostride]); + tmp216 = c_re(W[40]); + tmp218 = c_im(W[40]); + tmp220 = (tmp216 * tmp217) - (tmp218 * tmp219); + tmp488 = (tmp218 * tmp217) + (tmp216 * tmp219); + } + tmp221 = tmp215 + tmp220; + tmp486 = tmp215 - tmp220; + tmp489 = tmp487 - tmp488; + tmp850 = tmp487 + tmp488; + } + { + fftw_real tmp210; + fftw_real tmp233; + fftw_real tmp834; + fftw_real tmp835; + ASSERT_ALIGNED_DOUBLE; + tmp210 = tmp198 + tmp209; + tmp233 = tmp221 + tmp232; + tmp234 = tmp210 + tmp233; + tmp977 = tmp210 - tmp233; + tmp834 = tmp832 - tmp833; + tmp835 = tmp232 - tmp221; + tmp836 = tmp834 - tmp835; + tmp923 = tmp834 + tmp835; + } + { + fftw_real tmp972; + fftw_real tmp973; + fftw_real tmp849; + fftw_real tmp852; + ASSERT_ALIGNED_DOUBLE; + tmp972 = tmp832 + tmp833; + tmp973 = tmp850 + tmp851; + tmp974 = tmp972 - tmp973; + tmp1021 = tmp972 + tmp973; + tmp849 = tmp198 - tmp209; + tmp852 = tmp850 - tmp851; + tmp853 = tmp849 - tmp852; + tmp920 = tmp849 + tmp852; + } + { + fftw_real tmp478; + fftw_real tmp479; + fftw_real tmp517; + fftw_real tmp520; + ASSERT_ALIGNED_DOUBLE; + tmp478 = tmp476 - tmp477; + tmp479 = tmp203 - tmp208; + tmp480 = tmp478 + tmp479; + tmp684 = tmp478 - tmp479; + tmp517 = tmp192 - tmp197; + tmp520 = tmp518 - tmp519; + tmp521 = tmp517 - tmp520; + tmp695 = tmp517 + tmp520; + } + { + fftw_real tmp522; + fftw_real tmp523; + fftw_real tmp485; + fftw_real tmp490; + ASSERT_ALIGNED_DOUBLE; + tmp522 = tmp489 - tmp486; + tmp523 = tmp481 + tmp484; + tmp524 = K707106781 * (tmp522 - tmp523); + tmp685 = K707106781 * (tmp522 + tmp523); + tmp485 = tmp481 - tmp484; + tmp490 = tmp486 + tmp489; + tmp491 = K707106781 * (tmp485 - tmp490); + tmp696 = K707106781 * (tmp490 + tmp485); + } + } + { + fftw_real tmp245; + fftw_real tmp509; + fftw_real tmp506; + fftw_real tmp843; + fftw_real tmp279; + fftw_real tmp501; + fftw_real tmp496; + fftw_real tmp839; + fftw_real tmp256; + fftw_real tmp507; + fftw_real tmp512; + fftw_real tmp844; + fftw_real tmp268; + fftw_real tmp493; + fftw_real tmp500; + fftw_real tmp838; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp239; + fftw_real tmp504; + fftw_real tmp244; + fftw_real tmp505; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp236; + fftw_real tmp238; + fftw_real tmp235; + fftw_real tmp237; + ASSERT_ALIGNED_DOUBLE; + tmp236 = c_re(inout[5 * iostride]); + tmp238 = c_im(inout[5 * iostride]); + tmp235 = c_re(W[4]); + tmp237 = c_im(W[4]); + tmp239 = (tmp235 * tmp236) - (tmp237 * tmp238); + tmp504 = (tmp237 * tmp236) + (tmp235 * tmp238); + } + { + fftw_real tmp241; + fftw_real tmp243; + fftw_real tmp240; + fftw_real tmp242; + ASSERT_ALIGNED_DOUBLE; + tmp241 = c_re(inout[37 * iostride]); + tmp243 = c_im(inout[37 * iostride]); + tmp240 = c_re(W[36]); + tmp242 = c_im(W[36]); + tmp244 = (tmp240 * tmp241) - (tmp242 * tmp243); + tmp505 = (tmp242 * tmp241) + (tmp240 * tmp243); + } + tmp245 = tmp239 + tmp244; + tmp509 = tmp239 - tmp244; + tmp506 = tmp504 - tmp505; + tmp843 = tmp504 + tmp505; + } + { + fftw_real tmp273; + fftw_real tmp494; + fftw_real tmp278; + fftw_real tmp495; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp270; + fftw_real tmp272; + fftw_real tmp269; + fftw_real tmp271; + ASSERT_ALIGNED_DOUBLE; + tmp270 = c_re(inout[13 * iostride]); + tmp272 = c_im(inout[13 * iostride]); + tmp269 = c_re(W[12]); + tmp271 = c_im(W[12]); + tmp273 = (tmp269 * tmp270) - (tmp271 * tmp272); + tmp494 = (tmp271 * tmp270) + (tmp269 * tmp272); + } + { + fftw_real tmp275; + fftw_real tmp277; + fftw_real tmp274; + fftw_real tmp276; + ASSERT_ALIGNED_DOUBLE; + tmp275 = c_re(inout[45 * iostride]); + tmp277 = c_im(inout[45 * iostride]); + tmp274 = c_re(W[44]); + tmp276 = c_im(W[44]); + tmp278 = (tmp274 * tmp275) - (tmp276 * tmp277); + tmp495 = (tmp276 * tmp275) + (tmp274 * tmp277); + } + tmp279 = tmp273 + tmp278; + tmp501 = tmp273 - tmp278; + tmp496 = tmp494 - tmp495; + tmp839 = tmp494 + tmp495; + } + { + fftw_real tmp250; + fftw_real tmp510; + fftw_real tmp255; + fftw_real tmp511; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp247; + fftw_real tmp249; + fftw_real tmp246; + fftw_real tmp248; + ASSERT_ALIGNED_DOUBLE; + tmp247 = c_re(inout[21 * iostride]); + tmp249 = c_im(inout[21 * iostride]); + tmp246 = c_re(W[20]); + tmp248 = c_im(W[20]); + tmp250 = (tmp246 * tmp247) - (tmp248 * tmp249); + tmp510 = (tmp248 * tmp247) + (tmp246 * tmp249); + } + { + fftw_real tmp252; + fftw_real tmp254; + fftw_real tmp251; + fftw_real tmp253; + ASSERT_ALIGNED_DOUBLE; + tmp252 = c_re(inout[53 * iostride]); + tmp254 = c_im(inout[53 * iostride]); + tmp251 = c_re(W[52]); + tmp253 = c_im(W[52]); + tmp255 = (tmp251 * tmp252) - (tmp253 * tmp254); + tmp511 = (tmp253 * tmp252) + (tmp251 * tmp254); + } + tmp256 = tmp250 + tmp255; + tmp507 = tmp250 - tmp255; + tmp512 = tmp510 - tmp511; + tmp844 = tmp510 + tmp511; + } + { + fftw_real tmp262; + fftw_real tmp498; + fftw_real tmp267; + fftw_real tmp499; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp259; + fftw_real tmp261; + fftw_real tmp258; + fftw_real tmp260; + ASSERT_ALIGNED_DOUBLE; + tmp259 = c_re(inout[61 * iostride]); + tmp261 = c_im(inout[61 * iostride]); + tmp258 = c_re(W[60]); + tmp260 = c_im(W[60]); + tmp262 = (tmp258 * tmp259) - (tmp260 * tmp261); + tmp498 = (tmp260 * tmp259) + (tmp258 * tmp261); + } + { + fftw_real tmp264; + fftw_real tmp266; + fftw_real tmp263; + fftw_real tmp265; + ASSERT_ALIGNED_DOUBLE; + tmp264 = c_re(inout[29 * iostride]); + tmp266 = c_im(inout[29 * iostride]); + tmp263 = c_re(W[28]); + tmp265 = c_im(W[28]); + tmp267 = (tmp263 * tmp264) - (tmp265 * tmp266); + tmp499 = (tmp265 * tmp264) + (tmp263 * tmp266); + } + tmp268 = tmp262 + tmp267; + tmp493 = tmp262 - tmp267; + tmp500 = tmp498 - tmp499; + tmp838 = tmp498 + tmp499; + } + { + fftw_real tmp257; + fftw_real tmp280; + fftw_real tmp837; + fftw_real tmp840; + ASSERT_ALIGNED_DOUBLE; + tmp257 = tmp245 + tmp256; + tmp280 = tmp268 + tmp279; + tmp281 = tmp257 + tmp280; + tmp975 = tmp280 - tmp257; + tmp837 = tmp268 - tmp279; + tmp840 = tmp838 - tmp839; + tmp841 = tmp837 - tmp840; + tmp855 = tmp837 + tmp840; + } + { + fftw_real tmp978; + fftw_real tmp979; + fftw_real tmp842; + fftw_real tmp845; + ASSERT_ALIGNED_DOUBLE; + tmp978 = tmp843 + tmp844; + tmp979 = tmp838 + tmp839; + tmp980 = tmp978 - tmp979; + tmp1022 = tmp978 + tmp979; + tmp842 = tmp245 - tmp256; + tmp845 = tmp843 - tmp844; + tmp846 = tmp842 + tmp845; + tmp854 = tmp845 - tmp842; + } + { + fftw_real tmp497; + fftw_real tmp502; + fftw_real tmp687; + fftw_real tmp688; + ASSERT_ALIGNED_DOUBLE; + tmp497 = tmp493 - tmp496; + tmp502 = tmp500 + tmp501; + tmp503 = (K382683432 * tmp497) - (K923879532 * tmp502); + tmp527 = (K382683432 * tmp502) + (K923879532 * tmp497); + tmp687 = tmp493 + tmp496; + tmp688 = tmp500 - tmp501; + tmp689 = (K923879532 * tmp687) - (K382683432 * tmp688); + tmp699 = (K923879532 * tmp688) + (K382683432 * tmp687); + } + { + fftw_real tmp508; + fftw_real tmp513; + fftw_real tmp690; + fftw_real tmp691; + ASSERT_ALIGNED_DOUBLE; + tmp508 = tmp506 + tmp507; + tmp513 = tmp509 - tmp512; + tmp514 = (K923879532 * tmp508) + (K382683432 * tmp513); + tmp526 = (K382683432 * tmp508) - (K923879532 * tmp513); + tmp690 = tmp506 - tmp507; + tmp691 = tmp509 + tmp512; + tmp692 = (K382683432 * tmp690) + (K923879532 * tmp691); + tmp698 = (K923879532 * tmp690) - (K382683432 * tmp691); + } + } + { + fftw_real tmp91; + fftw_real tmp1015; + fftw_real tmp1038; + fftw_real tmp1039; + fftw_real tmp1049; + fftw_real tmp1055; + fftw_real tmp186; + fftw_real tmp1054; + fftw_real tmp1024; + fftw_real tmp1032; + fftw_real tmp378; + fftw_real tmp1051; + fftw_real tmp1029; + fftw_real tmp1033; + fftw_real tmp1018; + fftw_real tmp1040; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp43; + fftw_real tmp90; + fftw_real tmp1036; + fftw_real tmp1037; + ASSERT_ALIGNED_DOUBLE; + tmp43 = tmp19 + tmp42; + tmp90 = tmp66 + tmp89; + tmp91 = tmp43 + tmp90; + tmp1015 = tmp43 - tmp90; + tmp1036 = tmp1021 + tmp1022; + tmp1037 = tmp1026 + tmp1027; + tmp1038 = tmp1036 - tmp1037; + tmp1039 = tmp1036 + tmp1037; + } + { + fftw_real tmp1041; + fftw_real tmp1048; + fftw_real tmp138; + fftw_real tmp185; + ASSERT_ALIGNED_DOUBLE; + tmp1041 = tmp956 + tmp957; + tmp1048 = tmp1042 + tmp1047; + tmp1049 = tmp1041 + tmp1048; + tmp1055 = tmp1048 - tmp1041; + tmp138 = tmp114 + tmp137; + tmp185 = tmp161 + tmp184; + tmp186 = tmp138 + tmp185; + tmp1054 = tmp185 - tmp138; + } + { + fftw_real tmp1020; + fftw_real tmp1023; + fftw_real tmp282; + fftw_real tmp377; + ASSERT_ALIGNED_DOUBLE; + tmp1020 = tmp234 - tmp281; + tmp1023 = tmp1021 - tmp1022; + tmp1024 = tmp1020 + tmp1023; + tmp1032 = tmp1023 - tmp1020; + tmp282 = tmp234 + tmp281; + tmp377 = tmp329 + tmp376; + tmp378 = tmp282 + tmp377; + tmp1051 = tmp377 - tmp282; + } + { + fftw_real tmp1025; + fftw_real tmp1028; + fftw_real tmp1016; + fftw_real tmp1017; + ASSERT_ALIGNED_DOUBLE; + tmp1025 = tmp329 - tmp376; + tmp1028 = tmp1026 - tmp1027; + tmp1029 = tmp1025 - tmp1028; + tmp1033 = tmp1025 + tmp1028; + tmp1016 = tmp960 + tmp961; + tmp1017 = tmp966 + tmp967; + tmp1018 = tmp1016 - tmp1017; + tmp1040 = tmp1016 + tmp1017; + } + { + fftw_real tmp187; + fftw_real tmp1035; + fftw_real tmp1050; + fftw_real tmp1052; + ASSERT_ALIGNED_DOUBLE; + tmp187 = tmp91 + tmp186; + c_re(inout[32 * iostride]) = tmp187 - tmp378; + c_re(inout[0]) = tmp187 + tmp378; + tmp1035 = tmp91 - tmp186; + c_re(inout[48 * iostride]) = tmp1035 - tmp1038; + c_re(inout[16 * iostride]) = tmp1035 + tmp1038; + { + fftw_real tmp1019; + fftw_real tmp1030; + fftw_real tmp1057; + fftw_real tmp1058; + ASSERT_ALIGNED_DOUBLE; + tmp1019 = tmp1015 + tmp1018; + tmp1030 = K707106781 * (tmp1024 + tmp1029); + c_re(inout[40 * iostride]) = tmp1019 - tmp1030; + c_re(inout[8 * iostride]) = tmp1019 + tmp1030; + tmp1057 = K707106781 * (tmp1029 - tmp1024); + tmp1058 = tmp1055 - tmp1054; + c_im(inout[24 * iostride]) = tmp1057 + tmp1058; + c_im(inout[56 * iostride]) = tmp1058 - tmp1057; + } + tmp1050 = tmp1040 + tmp1049; + c_im(inout[0]) = tmp1039 + tmp1050; + c_im(inout[32 * iostride]) = tmp1050 - tmp1039; + tmp1052 = tmp1049 - tmp1040; + c_im(inout[16 * iostride]) = tmp1051 + tmp1052; + c_im(inout[48 * iostride]) = tmp1052 - tmp1051; + { + fftw_real tmp1053; + fftw_real tmp1056; + fftw_real tmp1031; + fftw_real tmp1034; + ASSERT_ALIGNED_DOUBLE; + tmp1053 = K707106781 * (tmp1032 + tmp1033); + tmp1056 = tmp1054 + tmp1055; + c_im(inout[8 * iostride]) = tmp1053 + tmp1056; + c_im(inout[40 * iostride]) = tmp1056 - tmp1053; + tmp1031 = tmp1015 - tmp1018; + tmp1034 = K707106781 * (tmp1032 - tmp1033); + c_re(inout[56 * iostride]) = tmp1031 - tmp1034; + c_re(inout[24 * iostride]) = tmp1031 + tmp1034; + } + } + } + { + fftw_real tmp959; + fftw_real tmp999; + fftw_real tmp1002; + fftw_real tmp1068; + fftw_real tmp970; + fftw_real tmp1060; + fftw_real tmp1063; + fftw_real tmp1069; + fftw_real tmp982; + fftw_real tmp996; + fftw_real tmp1006; + fftw_real tmp1012; + fftw_real tmp993; + fftw_real tmp997; + fftw_real tmp1009; + fftw_real tmp1013; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp955; + fftw_real tmp958; + fftw_real tmp1000; + fftw_real tmp1001; + ASSERT_ALIGNED_DOUBLE; + tmp955 = tmp19 - tmp42; + tmp958 = tmp956 - tmp957; + tmp959 = tmp955 - tmp958; + tmp999 = tmp955 + tmp958; + tmp1000 = tmp963 + tmp962; + tmp1001 = tmp965 - tmp968; + tmp1002 = K707106781 * (tmp1000 + tmp1001); + tmp1068 = K707106781 * (tmp1001 - tmp1000); + } + { + fftw_real tmp964; + fftw_real tmp969; + fftw_real tmp1061; + fftw_real tmp1062; + ASSERT_ALIGNED_DOUBLE; + tmp964 = tmp962 - tmp963; + tmp969 = tmp965 + tmp968; + tmp970 = K707106781 * (tmp964 - tmp969); + tmp1060 = K707106781 * (tmp964 + tmp969); + tmp1061 = tmp89 - tmp66; + tmp1062 = tmp1047 - tmp1042; + tmp1063 = tmp1061 + tmp1062; + tmp1069 = tmp1062 - tmp1061; + } + { + fftw_real tmp976; + fftw_real tmp981; + fftw_real tmp1004; + fftw_real tmp1005; + ASSERT_ALIGNED_DOUBLE; + tmp976 = tmp974 - tmp975; + tmp981 = tmp977 - tmp980; + tmp982 = (K923879532 * tmp976) + (K382683432 * tmp981); + tmp996 = (K382683432 * tmp976) - (K923879532 * tmp981); + tmp1004 = tmp974 + tmp975; + tmp1005 = tmp977 + tmp980; + tmp1006 = + (K382683432 * tmp1004) + (K923879532 * tmp1005); + tmp1012 = + (K923879532 * tmp1004) - (K382683432 * tmp1005); + } + { + fftw_real tmp987; + fftw_real tmp992; + fftw_real tmp1007; + fftw_real tmp1008; + ASSERT_ALIGNED_DOUBLE; + tmp987 = tmp983 - tmp986; + tmp992 = tmp990 - tmp991; + tmp993 = (K382683432 * tmp987) - (K923879532 * tmp992); + tmp997 = (K382683432 * tmp992) + (K923879532 * tmp987); + tmp1007 = tmp983 + tmp986; + tmp1008 = tmp990 + tmp991; + tmp1009 = + (K923879532 * tmp1007) - (K382683432 * tmp1008); + tmp1013 = + (K923879532 * tmp1008) + (K382683432 * tmp1007); + } + { + fftw_real tmp971; + fftw_real tmp994; + fftw_real tmp995; + fftw_real tmp998; + ASSERT_ALIGNED_DOUBLE; + tmp971 = tmp959 + tmp970; + tmp994 = tmp982 + tmp993; + c_re(inout[44 * iostride]) = tmp971 - tmp994; + c_re(inout[12 * iostride]) = tmp971 + tmp994; + tmp995 = tmp959 - tmp970; + tmp998 = tmp996 - tmp997; + c_re(inout[60 * iostride]) = tmp995 - tmp998; + c_re(inout[28 * iostride]) = tmp995 + tmp998; + } + { + fftw_real tmp1067; + fftw_real tmp1070; + fftw_real tmp1071; + fftw_real tmp1072; + ASSERT_ALIGNED_DOUBLE; + tmp1067 = tmp996 + tmp997; + tmp1070 = tmp1068 + tmp1069; + c_im(inout[12 * iostride]) = tmp1067 + tmp1070; + c_im(inout[44 * iostride]) = tmp1070 - tmp1067; + tmp1071 = tmp993 - tmp982; + tmp1072 = tmp1069 - tmp1068; + c_im(inout[28 * iostride]) = tmp1071 + tmp1072; + c_im(inout[60 * iostride]) = tmp1072 - tmp1071; + } + { + fftw_real tmp1003; + fftw_real tmp1010; + fftw_real tmp1011; + fftw_real tmp1014; + ASSERT_ALIGNED_DOUBLE; + tmp1003 = tmp999 + tmp1002; + tmp1010 = tmp1006 + tmp1009; + c_re(inout[36 * iostride]) = tmp1003 - tmp1010; + c_re(inout[4 * iostride]) = tmp1003 + tmp1010; + tmp1011 = tmp999 - tmp1002; + tmp1014 = tmp1012 - tmp1013; + c_re(inout[52 * iostride]) = tmp1011 - tmp1014; + c_re(inout[20 * iostride]) = tmp1011 + tmp1014; + } + { + fftw_real tmp1059; + fftw_real tmp1064; + fftw_real tmp1065; + fftw_real tmp1066; + ASSERT_ALIGNED_DOUBLE; + tmp1059 = tmp1012 + tmp1013; + tmp1064 = tmp1060 + tmp1063; + c_im(inout[4 * iostride]) = tmp1059 + tmp1064; + c_im(inout[36 * iostride]) = tmp1064 - tmp1059; + tmp1065 = tmp1009 - tmp1006; + tmp1066 = tmp1063 - tmp1060; + c_im(inout[20 * iostride]) = tmp1065 + tmp1066; + c_im(inout[52 * iostride]) = tmp1066 - tmp1065; + } + } + { + fftw_real tmp419; + fftw_real tmp591; + fftw_real tmp1155; + fftw_real tmp1161; + fftw_real tmp474; + fftw_real tmp1152; + fftw_real tmp594; + fftw_real tmp1160; + fftw_real tmp530; + fftw_real tmp588; + fftw_real tmp598; + fftw_real tmp604; + fftw_real tmp585; + fftw_real tmp589; + fftw_real tmp601; + fftw_real tmp605; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp395; + fftw_real tmp418; + fftw_real tmp1153; + fftw_real tmp1154; + ASSERT_ALIGNED_DOUBLE; + tmp395 = tmp383 - tmp394; + tmp418 = tmp406 - tmp417; + tmp419 = tmp395 - tmp418; + tmp591 = tmp395 + tmp418; + tmp1153 = tmp609 - tmp608; + tmp1154 = tmp1139 - tmp1138; + tmp1155 = tmp1153 + tmp1154; + tmp1161 = tmp1154 - tmp1153; + } + { + fftw_real tmp446; + fftw_real tmp592; + fftw_real tmp473; + fftw_real tmp593; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp436; + fftw_real tmp445; + fftw_real tmp463; + fftw_real tmp472; + ASSERT_ALIGNED_DOUBLE; + tmp436 = tmp424 - tmp435; + tmp445 = tmp441 - tmp444; + tmp446 = + (K195090322 * tmp436) - (K980785280 * tmp445); + tmp592 = + (K980785280 * tmp436) + (K195090322 * tmp445); + tmp463 = tmp451 - tmp462; + tmp472 = tmp468 - tmp471; + tmp473 = + (K195090322 * tmp463) + (K980785280 * tmp472); + tmp593 = + (K195090322 * tmp472) - (K980785280 * tmp463); + } + tmp474 = tmp446 - tmp473; + tmp1152 = tmp446 + tmp473; + tmp594 = tmp592 + tmp593; + tmp1160 = tmp593 - tmp592; + } + { + fftw_real tmp516; + fftw_real tmp596; + fftw_real tmp529; + fftw_real tmp597; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp492; + fftw_real tmp515; + fftw_real tmp525; + fftw_real tmp528; + ASSERT_ALIGNED_DOUBLE; + tmp492 = tmp480 - tmp491; + tmp515 = tmp503 - tmp514; + tmp516 = tmp492 - tmp515; + tmp596 = tmp492 + tmp515; + tmp525 = tmp521 - tmp524; + tmp528 = tmp526 - tmp527; + tmp529 = tmp525 - tmp528; + tmp597 = tmp525 + tmp528; + } + tmp530 = (K995184726 * tmp516) + (K098017140 * tmp529); + tmp588 = (K098017140 * tmp516) - (K995184726 * tmp529); + tmp598 = (K634393284 * tmp596) + (K773010453 * tmp597); + tmp604 = (K773010453 * tmp596) - (K634393284 * tmp597); + } + { + fftw_real tmp571; + fftw_real tmp599; + fftw_real tmp584; + fftw_real tmp600; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp547; + fftw_real tmp570; + fftw_real tmp580; + fftw_real tmp583; + ASSERT_ALIGNED_DOUBLE; + tmp547 = tmp535 - tmp546; + tmp570 = tmp558 - tmp569; + tmp571 = tmp547 - tmp570; + tmp599 = tmp547 + tmp570; + tmp580 = tmp576 - tmp579; + tmp583 = tmp581 - tmp582; + tmp584 = tmp580 - tmp583; + tmp600 = tmp580 + tmp583; + } + tmp585 = (K098017140 * tmp571) - (K995184726 * tmp584); + tmp589 = (K098017140 * tmp584) + (K995184726 * tmp571); + tmp601 = (K773010453 * tmp599) - (K634393284 * tmp600); + tmp605 = (K773010453 * tmp600) + (K634393284 * tmp599); + } + { + fftw_real tmp475; + fftw_real tmp586; + fftw_real tmp587; + fftw_real tmp590; + ASSERT_ALIGNED_DOUBLE; + tmp475 = tmp419 + tmp474; + tmp586 = tmp530 + tmp585; + c_re(inout[47 * iostride]) = tmp475 - tmp586; + c_re(inout[15 * iostride]) = tmp475 + tmp586; + tmp587 = tmp419 - tmp474; + tmp590 = tmp588 - tmp589; + c_re(inout[63 * iostride]) = tmp587 - tmp590; + c_re(inout[31 * iostride]) = tmp587 + tmp590; + } + { + fftw_real tmp1159; + fftw_real tmp1162; + fftw_real tmp1163; + fftw_real tmp1164; + ASSERT_ALIGNED_DOUBLE; + tmp1159 = tmp588 + tmp589; + tmp1162 = tmp1160 + tmp1161; + c_im(inout[15 * iostride]) = tmp1159 + tmp1162; + c_im(inout[47 * iostride]) = tmp1162 - tmp1159; + tmp1163 = tmp585 - tmp530; + tmp1164 = tmp1161 - tmp1160; + c_im(inout[31 * iostride]) = tmp1163 + tmp1164; + c_im(inout[63 * iostride]) = tmp1164 - tmp1163; + } + { + fftw_real tmp595; + fftw_real tmp602; + fftw_real tmp603; + fftw_real tmp606; + ASSERT_ALIGNED_DOUBLE; + tmp595 = tmp591 + tmp594; + tmp602 = tmp598 + tmp601; + c_re(inout[39 * iostride]) = tmp595 - tmp602; + c_re(inout[7 * iostride]) = tmp595 + tmp602; + tmp603 = tmp591 - tmp594; + tmp606 = tmp604 - tmp605; + c_re(inout[55 * iostride]) = tmp603 - tmp606; + c_re(inout[23 * iostride]) = tmp603 + tmp606; + } + { + fftw_real tmp1151; + fftw_real tmp1156; + fftw_real tmp1157; + fftw_real tmp1158; + ASSERT_ALIGNED_DOUBLE; + tmp1151 = tmp604 + tmp605; + tmp1156 = tmp1152 + tmp1155; + c_im(inout[7 * iostride]) = tmp1151 + tmp1156; + c_im(inout[39 * iostride]) = tmp1156 - tmp1151; + tmp1157 = tmp601 - tmp598; + tmp1158 = tmp1155 - tmp1152; + c_im(inout[23 * iostride]) = tmp1157 + tmp1158; + c_im(inout[55 * iostride]) = tmp1158 - tmp1157; + } + } + { + fftw_real tmp611; + fftw_real tmp639; + fftw_real tmp1141; + fftw_real tmp1147; + fftw_real tmp618; + fftw_real tmp1136; + fftw_real tmp642; + fftw_real tmp1146; + fftw_real tmp626; + fftw_real tmp636; + fftw_real tmp646; + fftw_real tmp652; + fftw_real tmp633; + fftw_real tmp637; + fftw_real tmp649; + fftw_real tmp653; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp607; + fftw_real tmp610; + fftw_real tmp1137; + fftw_real tmp1140; + ASSERT_ALIGNED_DOUBLE; + tmp607 = tmp383 + tmp394; + tmp610 = tmp608 + tmp609; + tmp611 = tmp607 - tmp610; + tmp639 = tmp607 + tmp610; + tmp1137 = tmp406 + tmp417; + tmp1140 = tmp1138 + tmp1139; + tmp1141 = tmp1137 + tmp1140; + tmp1147 = tmp1140 - tmp1137; + } + { + fftw_real tmp614; + fftw_real tmp640; + fftw_real tmp617; + fftw_real tmp641; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp612; + fftw_real tmp613; + fftw_real tmp615; + fftw_real tmp616; + ASSERT_ALIGNED_DOUBLE; + tmp612 = tmp424 + tmp435; + tmp613 = tmp441 + tmp444; + tmp614 = + (K831469612 * tmp612) - (K555570233 * tmp613); + tmp640 = + (K555570233 * tmp612) + (K831469612 * tmp613); + tmp615 = tmp451 + tmp462; + tmp616 = tmp468 + tmp471; + tmp617 = + (K831469612 * tmp615) + (K555570233 * tmp616); + tmp641 = + (K831469612 * tmp616) - (K555570233 * tmp615); + } + tmp618 = tmp614 - tmp617; + tmp1136 = tmp614 + tmp617; + tmp642 = tmp640 + tmp641; + tmp1146 = tmp641 - tmp640; + } + { + fftw_real tmp622; + fftw_real tmp644; + fftw_real tmp625; + fftw_real tmp645; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp620; + fftw_real tmp621; + fftw_real tmp623; + fftw_real tmp624; + ASSERT_ALIGNED_DOUBLE; + tmp620 = tmp521 + tmp524; + tmp621 = tmp514 + tmp503; + tmp622 = tmp620 - tmp621; + tmp644 = tmp620 + tmp621; + tmp623 = tmp480 + tmp491; + tmp624 = tmp526 + tmp527; + tmp625 = tmp623 - tmp624; + tmp645 = tmp623 + tmp624; + } + tmp626 = (K471396736 * tmp622) + (K881921264 * tmp625); + tmp636 = (K471396736 * tmp625) - (K881921264 * tmp622); + tmp646 = (K956940335 * tmp644) + (K290284677 * tmp645); + tmp652 = (K956940335 * tmp645) - (K290284677 * tmp644); + } + { + fftw_real tmp629; + fftw_real tmp647; + fftw_real tmp632; + fftw_real tmp648; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp627; + fftw_real tmp628; + fftw_real tmp630; + fftw_real tmp631; + ASSERT_ALIGNED_DOUBLE; + tmp627 = tmp535 + tmp546; + tmp628 = tmp582 + tmp581; + tmp629 = tmp627 - tmp628; + tmp647 = tmp627 + tmp628; + tmp630 = tmp576 + tmp579; + tmp631 = tmp558 + tmp569; + tmp632 = tmp630 - tmp631; + tmp648 = tmp630 + tmp631; + } + tmp633 = (K471396736 * tmp629) - (K881921264 * tmp632); + tmp637 = (K881921264 * tmp629) + (K471396736 * tmp632); + tmp649 = (K956940335 * tmp647) - (K290284677 * tmp648); + tmp653 = (K290284677 * tmp647) + (K956940335 * tmp648); + } + { + fftw_real tmp619; + fftw_real tmp634; + fftw_real tmp635; + fftw_real tmp638; + ASSERT_ALIGNED_DOUBLE; + tmp619 = tmp611 + tmp618; + tmp634 = tmp626 + tmp633; + c_re(inout[43 * iostride]) = tmp619 - tmp634; + c_re(inout[11 * iostride]) = tmp619 + tmp634; + tmp635 = tmp611 - tmp618; + tmp638 = tmp636 - tmp637; + c_re(inout[59 * iostride]) = tmp635 - tmp638; + c_re(inout[27 * iostride]) = tmp635 + tmp638; + } + { + fftw_real tmp1145; + fftw_real tmp1148; + fftw_real tmp1149; + fftw_real tmp1150; + ASSERT_ALIGNED_DOUBLE; + tmp1145 = tmp636 + tmp637; + tmp1148 = tmp1146 + tmp1147; + c_im(inout[11 * iostride]) = tmp1145 + tmp1148; + c_im(inout[43 * iostride]) = tmp1148 - tmp1145; + tmp1149 = tmp633 - tmp626; + tmp1150 = tmp1147 - tmp1146; + c_im(inout[27 * iostride]) = tmp1149 + tmp1150; + c_im(inout[59 * iostride]) = tmp1150 - tmp1149; + } + { + fftw_real tmp643; + fftw_real tmp650; + fftw_real tmp651; + fftw_real tmp654; + ASSERT_ALIGNED_DOUBLE; + tmp643 = tmp639 + tmp642; + tmp650 = tmp646 + tmp649; + c_re(inout[35 * iostride]) = tmp643 - tmp650; + c_re(inout[3 * iostride]) = tmp643 + tmp650; + tmp651 = tmp639 - tmp642; + tmp654 = tmp652 - tmp653; + c_re(inout[51 * iostride]) = tmp651 - tmp654; + c_re(inout[19 * iostride]) = tmp651 + tmp654; + } + { + fftw_real tmp1135; + fftw_real tmp1142; + fftw_real tmp1143; + fftw_real tmp1144; + ASSERT_ALIGNED_DOUBLE; + tmp1135 = tmp652 + tmp653; + tmp1142 = tmp1136 + tmp1141; + c_im(inout[3 * iostride]) = tmp1135 + tmp1142; + c_im(inout[35 * iostride]) = tmp1142 - tmp1135; + tmp1143 = tmp649 - tmp646; + tmp1144 = tmp1141 - tmp1136; + c_im(inout[19 * iostride]) = tmp1143 + tmp1144; + c_im(inout[51 * iostride]) = tmp1144 - tmp1143; + } + } + { + fftw_real tmp807; + fftw_real tmp891; + fftw_real tmp830; + fftw_real tmp1090; + fftw_real tmp1093; + fftw_real tmp1099; + fftw_real tmp894; + fftw_real tmp1098; + fftw_real tmp885; + fftw_real tmp889; + fftw_real tmp901; + fftw_real tmp905; + fftw_real tmp858; + fftw_real tmp888; + fftw_real tmp898; + fftw_real tmp904; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp795; + fftw_real tmp806; + fftw_real tmp892; + fftw_real tmp893; + ASSERT_ALIGNED_DOUBLE; + tmp795 = tmp791 - tmp794; + tmp806 = K707106781 * (tmp800 - tmp805); + tmp807 = tmp795 - tmp806; + tmp891 = tmp795 + tmp806; + { + fftw_real tmp818; + fftw_real tmp829; + fftw_real tmp1091; + fftw_real tmp1092; + ASSERT_ALIGNED_DOUBLE; + tmp818 = + (K382683432 * tmp812) - (K923879532 * tmp817); + tmp829 = + (K382683432 * tmp823) + (K923879532 * tmp828); + tmp830 = tmp818 - tmp829; + tmp1090 = tmp818 + tmp829; + tmp1091 = K707106781 * (tmp909 - tmp908); + tmp1092 = tmp1077 - tmp1076; + tmp1093 = tmp1091 + tmp1092; + tmp1099 = tmp1092 - tmp1091; + } + tmp892 = (K923879532 * tmp812) + (K382683432 * tmp817); + tmp893 = (K382683432 * tmp828) - (K923879532 * tmp823); + tmp894 = tmp892 + tmp893; + tmp1098 = tmp893 - tmp892; + { + fftw_real tmp875; + fftw_real tmp899; + fftw_real tmp884; + fftw_real tmp900; + fftw_real tmp874; + fftw_real tmp883; + ASSERT_ALIGNED_DOUBLE; + tmp874 = K707106781 * (tmp868 - tmp873); + tmp875 = tmp863 - tmp874; + tmp899 = tmp863 + tmp874; + tmp883 = K707106781 * (tmp881 - tmp882); + tmp884 = tmp880 - tmp883; + tmp900 = tmp880 + tmp883; + tmp885 = + (K195090322 * tmp875) - (K980785280 * tmp884); + tmp889 = + (K195090322 * tmp884) + (K980785280 * tmp875); + tmp901 = + (K831469612 * tmp899) - (K555570233 * tmp900); + tmp905 = + (K831469612 * tmp900) + (K555570233 * tmp899); + } + { + fftw_real tmp848; + fftw_real tmp896; + fftw_real tmp857; + fftw_real tmp897; + fftw_real tmp847; + fftw_real tmp856; + ASSERT_ALIGNED_DOUBLE; + tmp847 = K707106781 * (tmp841 - tmp846); + tmp848 = tmp836 - tmp847; + tmp896 = tmp836 + tmp847; + tmp856 = K707106781 * (tmp854 - tmp855); + tmp857 = tmp853 - tmp856; + tmp897 = tmp853 + tmp856; + tmp858 = + (K980785280 * tmp848) + (K195090322 * tmp857); + tmp888 = + (K195090322 * tmp848) - (K980785280 * tmp857); + tmp898 = + (K555570233 * tmp896) + (K831469612 * tmp897); + tmp904 = + (K831469612 * tmp896) - (K555570233 * tmp897); + } + } + { + fftw_real tmp831; + fftw_real tmp886; + fftw_real tmp887; + fftw_real tmp890; + ASSERT_ALIGNED_DOUBLE; + tmp831 = tmp807 + tmp830; + tmp886 = tmp858 + tmp885; + c_re(inout[46 * iostride]) = tmp831 - tmp886; + c_re(inout[14 * iostride]) = tmp831 + tmp886; + tmp887 = tmp807 - tmp830; + tmp890 = tmp888 - tmp889; + c_re(inout[62 * iostride]) = tmp887 - tmp890; + c_re(inout[30 * iostride]) = tmp887 + tmp890; + } + { + fftw_real tmp1097; + fftw_real tmp1100; + fftw_real tmp1101; + fftw_real tmp1102; + ASSERT_ALIGNED_DOUBLE; + tmp1097 = tmp888 + tmp889; + tmp1100 = tmp1098 + tmp1099; + c_im(inout[14 * iostride]) = tmp1097 + tmp1100; + c_im(inout[46 * iostride]) = tmp1100 - tmp1097; + tmp1101 = tmp885 - tmp858; + tmp1102 = tmp1099 - tmp1098; + c_im(inout[30 * iostride]) = tmp1101 + tmp1102; + c_im(inout[62 * iostride]) = tmp1102 - tmp1101; + } + { + fftw_real tmp895; + fftw_real tmp902; + fftw_real tmp903; + fftw_real tmp906; + ASSERT_ALIGNED_DOUBLE; + tmp895 = tmp891 + tmp894; + tmp902 = tmp898 + tmp901; + c_re(inout[38 * iostride]) = tmp895 - tmp902; + c_re(inout[6 * iostride]) = tmp895 + tmp902; + tmp903 = tmp891 - tmp894; + tmp906 = tmp904 - tmp905; + c_re(inout[54 * iostride]) = tmp903 - tmp906; + c_re(inout[22 * iostride]) = tmp903 + tmp906; + } + { + fftw_real tmp1089; + fftw_real tmp1094; + fftw_real tmp1095; + fftw_real tmp1096; + ASSERT_ALIGNED_DOUBLE; + tmp1089 = tmp904 + tmp905; + tmp1094 = tmp1090 + tmp1093; + c_im(inout[6 * iostride]) = tmp1089 + tmp1094; + c_im(inout[38 * iostride]) = tmp1094 - tmp1089; + tmp1095 = tmp901 - tmp898; + tmp1096 = tmp1093 - tmp1090; + c_im(inout[22 * iostride]) = tmp1095 + tmp1096; + c_im(inout[54 * iostride]) = tmp1096 - tmp1095; + } + } + { + fftw_real tmp911; + fftw_real tmp939; + fftw_real tmp918; + fftw_real tmp1074; + fftw_real tmp1079; + fftw_real tmp1085; + fftw_real tmp942; + fftw_real tmp1084; + fftw_real tmp933; + fftw_real tmp937; + fftw_real tmp949; + fftw_real tmp953; + fftw_real tmp926; + fftw_real tmp936; + fftw_real tmp946; + fftw_real tmp952; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp907; + fftw_real tmp910; + fftw_real tmp940; + fftw_real tmp941; + ASSERT_ALIGNED_DOUBLE; + tmp907 = tmp791 + tmp794; + tmp910 = K707106781 * (tmp908 + tmp909); + tmp911 = tmp907 - tmp910; + tmp939 = tmp907 + tmp910; + { + fftw_real tmp914; + fftw_real tmp917; + fftw_real tmp1075; + fftw_real tmp1078; + ASSERT_ALIGNED_DOUBLE; + tmp914 = + (K923879532 * tmp912) - (K382683432 * tmp913); + tmp917 = + (K923879532 * tmp915) + (K382683432 * tmp916); + tmp918 = tmp914 - tmp917; + tmp1074 = tmp914 + tmp917; + tmp1075 = K707106781 * (tmp800 + tmp805); + tmp1078 = tmp1076 + tmp1077; + tmp1079 = tmp1075 + tmp1078; + tmp1085 = tmp1078 - tmp1075; + } + tmp940 = (K382683432 * tmp912) + (K923879532 * tmp913); + tmp941 = (K923879532 * tmp916) - (K382683432 * tmp915); + tmp942 = tmp940 + tmp941; + tmp1084 = tmp941 - tmp940; + { + fftw_real tmp929; + fftw_real tmp947; + fftw_real tmp932; + fftw_real tmp948; + fftw_real tmp928; + fftw_real tmp931; + ASSERT_ALIGNED_DOUBLE; + tmp928 = K707106781 * (tmp882 + tmp881); + tmp929 = tmp927 - tmp928; + tmp947 = tmp927 + tmp928; + tmp931 = K707106781 * (tmp868 + tmp873); + tmp932 = tmp930 - tmp931; + tmp948 = tmp930 + tmp931; + tmp933 = + (K555570233 * tmp929) - (K831469612 * tmp932); + tmp937 = + (K831469612 * tmp929) + (K555570233 * tmp932); + tmp949 = + (K980785280 * tmp947) - (K195090322 * tmp948); + tmp953 = + (K195090322 * tmp947) + (K980785280 * tmp948); + } + { + fftw_real tmp922; + fftw_real tmp944; + fftw_real tmp925; + fftw_real tmp945; + fftw_real tmp921; + fftw_real tmp924; + ASSERT_ALIGNED_DOUBLE; + tmp921 = K707106781 * (tmp846 + tmp841); + tmp922 = tmp920 - tmp921; + tmp944 = tmp920 + tmp921; + tmp924 = K707106781 * (tmp854 + tmp855); + tmp925 = tmp923 - tmp924; + tmp945 = tmp923 + tmp924; + tmp926 = + (K555570233 * tmp922) + (K831469612 * tmp925); + tmp936 = + (K555570233 * tmp925) - (K831469612 * tmp922); + tmp946 = + (K980785280 * tmp944) + (K195090322 * tmp945); + tmp952 = + (K980785280 * tmp945) - (K195090322 * tmp944); + } + } + { + fftw_real tmp919; + fftw_real tmp934; + fftw_real tmp935; + fftw_real tmp938; + ASSERT_ALIGNED_DOUBLE; + tmp919 = tmp911 + tmp918; + tmp934 = tmp926 + tmp933; + c_re(inout[42 * iostride]) = tmp919 - tmp934; + c_re(inout[10 * iostride]) = tmp919 + tmp934; + tmp935 = tmp911 - tmp918; + tmp938 = tmp936 - tmp937; + c_re(inout[58 * iostride]) = tmp935 - tmp938; + c_re(inout[26 * iostride]) = tmp935 + tmp938; + } + { + fftw_real tmp1083; + fftw_real tmp1086; + fftw_real tmp1087; + fftw_real tmp1088; + ASSERT_ALIGNED_DOUBLE; + tmp1083 = tmp936 + tmp937; + tmp1086 = tmp1084 + tmp1085; + c_im(inout[10 * iostride]) = tmp1083 + tmp1086; + c_im(inout[42 * iostride]) = tmp1086 - tmp1083; + tmp1087 = tmp933 - tmp926; + tmp1088 = tmp1085 - tmp1084; + c_im(inout[26 * iostride]) = tmp1087 + tmp1088; + c_im(inout[58 * iostride]) = tmp1088 - tmp1087; + } + { + fftw_real tmp943; + fftw_real tmp950; + fftw_real tmp951; + fftw_real tmp954; + ASSERT_ALIGNED_DOUBLE; + tmp943 = tmp939 + tmp942; + tmp950 = tmp946 + tmp949; + c_re(inout[34 * iostride]) = tmp943 - tmp950; + c_re(inout[2 * iostride]) = tmp943 + tmp950; + tmp951 = tmp939 - tmp942; + tmp954 = tmp952 - tmp953; + c_re(inout[50 * iostride]) = tmp951 - tmp954; + c_re(inout[18 * iostride]) = tmp951 + tmp954; + } + { + fftw_real tmp1073; + fftw_real tmp1080; + fftw_real tmp1081; + fftw_real tmp1082; + ASSERT_ALIGNED_DOUBLE; + tmp1073 = tmp952 + tmp953; + tmp1080 = tmp1074 + tmp1079; + c_im(inout[2 * iostride]) = tmp1073 + tmp1080; + c_im(inout[34 * iostride]) = tmp1080 - tmp1073; + tmp1081 = tmp949 - tmp946; + tmp1082 = tmp1079 - tmp1074; + c_im(inout[18 * iostride]) = tmp1081 + tmp1082; + c_im(inout[50 * iostride]) = tmp1082 - tmp1081; + } + } + { + fftw_real tmp667; + fftw_real tmp727; + fftw_real tmp1125; + fftw_real tmp1131; + fftw_real tmp682; + fftw_real tmp1122; + fftw_real tmp730; + fftw_real tmp1130; + fftw_real tmp702; + fftw_real tmp724; + fftw_real tmp734; + fftw_real tmp740; + fftw_real tmp721; + fftw_real tmp725; + fftw_real tmp737; + fftw_real tmp741; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp659; + fftw_real tmp666; + fftw_real tmp1123; + fftw_real tmp1124; + ASSERT_ALIGNED_DOUBLE; + tmp659 = tmp655 - tmp658; + tmp666 = tmp662 - tmp665; + tmp667 = tmp659 - tmp666; + tmp727 = tmp659 + tmp666; + tmp1123 = tmp745 - tmp744; + tmp1124 = tmp1109 - tmp1106; + tmp1125 = tmp1123 + tmp1124; + tmp1131 = tmp1124 - tmp1123; + } + { + fftw_real tmp674; + fftw_real tmp728; + fftw_real tmp681; + fftw_real tmp729; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp670; + fftw_real tmp673; + fftw_real tmp677; + fftw_real tmp680; + ASSERT_ALIGNED_DOUBLE; + tmp670 = tmp668 - tmp669; + tmp673 = tmp671 - tmp672; + tmp674 = + (K555570233 * tmp670) - (K831469612 * tmp673); + tmp728 = + (K555570233 * tmp673) + (K831469612 * tmp670); + tmp677 = tmp675 - tmp676; + tmp680 = tmp678 - tmp679; + tmp681 = + (K831469612 * tmp677) + (K555570233 * tmp680); + tmp729 = + (K555570233 * tmp677) - (K831469612 * tmp680); + } + tmp682 = tmp674 - tmp681; + tmp1122 = tmp674 + tmp681; + tmp730 = tmp728 + tmp729; + tmp1130 = tmp729 - tmp728; + } + { + fftw_real tmp694; + fftw_real tmp732; + fftw_real tmp701; + fftw_real tmp733; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp686; + fftw_real tmp693; + fftw_real tmp697; + fftw_real tmp700; + ASSERT_ALIGNED_DOUBLE; + tmp686 = tmp684 - tmp685; + tmp693 = tmp689 - tmp692; + tmp694 = tmp686 - tmp693; + tmp732 = tmp686 + tmp693; + tmp697 = tmp695 - tmp696; + tmp700 = tmp698 - tmp699; + tmp701 = tmp697 - tmp700; + tmp733 = tmp697 + tmp700; + } + tmp702 = (K956940335 * tmp694) + (K290284677 * tmp701); + tmp724 = (K290284677 * tmp694) - (K956940335 * tmp701); + tmp734 = (K471396736 * tmp732) + (K881921264 * tmp733); + tmp740 = (K881921264 * tmp732) - (K471396736 * tmp733); + } + { + fftw_real tmp713; + fftw_real tmp735; + fftw_real tmp720; + fftw_real tmp736; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp705; + fftw_real tmp712; + fftw_real tmp716; + fftw_real tmp719; + ASSERT_ALIGNED_DOUBLE; + tmp705 = tmp703 - tmp704; + tmp712 = tmp708 - tmp711; + tmp713 = tmp705 - tmp712; + tmp735 = tmp705 + tmp712; + tmp716 = tmp714 - tmp715; + tmp719 = tmp717 - tmp718; + tmp720 = tmp716 - tmp719; + tmp736 = tmp716 + tmp719; + } + tmp721 = (K290284677 * tmp713) - (K956940335 * tmp720); + tmp725 = (K290284677 * tmp720) + (K956940335 * tmp713); + tmp737 = (K881921264 * tmp735) - (K471396736 * tmp736); + tmp741 = (K881921264 * tmp736) + (K471396736 * tmp735); + } + { + fftw_real tmp683; + fftw_real tmp722; + fftw_real tmp723; + fftw_real tmp726; + ASSERT_ALIGNED_DOUBLE; + tmp683 = tmp667 + tmp682; + tmp722 = tmp702 + tmp721; + c_re(inout[45 * iostride]) = tmp683 - tmp722; + c_re(inout[13 * iostride]) = tmp683 + tmp722; + tmp723 = tmp667 - tmp682; + tmp726 = tmp724 - tmp725; + c_re(inout[61 * iostride]) = tmp723 - tmp726; + c_re(inout[29 * iostride]) = tmp723 + tmp726; + } + { + fftw_real tmp1129; + fftw_real tmp1132; + fftw_real tmp1133; + fftw_real tmp1134; + ASSERT_ALIGNED_DOUBLE; + tmp1129 = tmp724 + tmp725; + tmp1132 = tmp1130 + tmp1131; + c_im(inout[13 * iostride]) = tmp1129 + tmp1132; + c_im(inout[45 * iostride]) = tmp1132 - tmp1129; + tmp1133 = tmp721 - tmp702; + tmp1134 = tmp1131 - tmp1130; + c_im(inout[29 * iostride]) = tmp1133 + tmp1134; + c_im(inout[61 * iostride]) = tmp1134 - tmp1133; + } + { + fftw_real tmp731; + fftw_real tmp738; + fftw_real tmp739; + fftw_real tmp742; + ASSERT_ALIGNED_DOUBLE; + tmp731 = tmp727 + tmp730; + tmp738 = tmp734 + tmp737; + c_re(inout[37 * iostride]) = tmp731 - tmp738; + c_re(inout[5 * iostride]) = tmp731 + tmp738; + tmp739 = tmp727 - tmp730; + tmp742 = tmp740 - tmp741; + c_re(inout[53 * iostride]) = tmp739 - tmp742; + c_re(inout[21 * iostride]) = tmp739 + tmp742; + } + { + fftw_real tmp1121; + fftw_real tmp1126; + fftw_real tmp1127; + fftw_real tmp1128; + ASSERT_ALIGNED_DOUBLE; + tmp1121 = tmp740 + tmp741; + tmp1126 = tmp1122 + tmp1125; + c_im(inout[5 * iostride]) = tmp1121 + tmp1126; + c_im(inout[37 * iostride]) = tmp1126 - tmp1121; + tmp1127 = tmp737 - tmp734; + tmp1128 = tmp1125 - tmp1122; + c_im(inout[21 * iostride]) = tmp1127 + tmp1128; + c_im(inout[53 * iostride]) = tmp1128 - tmp1127; + } + } + { + fftw_real tmp747; + fftw_real tmp775; + fftw_real tmp1111; + fftw_real tmp1117; + fftw_real tmp754; + fftw_real tmp1104; + fftw_real tmp778; + fftw_real tmp1116; + fftw_real tmp762; + fftw_real tmp772; + fftw_real tmp782; + fftw_real tmp788; + fftw_real tmp769; + fftw_real tmp773; + fftw_real tmp785; + fftw_real tmp789; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp743; + fftw_real tmp746; + fftw_real tmp1105; + fftw_real tmp1110; + ASSERT_ALIGNED_DOUBLE; + tmp743 = tmp655 + tmp658; + tmp746 = tmp744 + tmp745; + tmp747 = tmp743 - tmp746; + tmp775 = tmp743 + tmp746; + tmp1105 = tmp662 + tmp665; + tmp1110 = tmp1106 + tmp1109; + tmp1111 = tmp1105 + tmp1110; + tmp1117 = tmp1110 - tmp1105; + } + { + fftw_real tmp750; + fftw_real tmp776; + fftw_real tmp753; + fftw_real tmp777; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp748; + fftw_real tmp749; + fftw_real tmp751; + fftw_real tmp752; + ASSERT_ALIGNED_DOUBLE; + tmp748 = tmp668 + tmp669; + tmp749 = tmp671 + tmp672; + tmp750 = + (K980785280 * tmp748) - (K195090322 * tmp749); + tmp776 = + (K980785280 * tmp749) + (K195090322 * tmp748); + tmp751 = tmp675 + tmp676; + tmp752 = tmp678 + tmp679; + tmp753 = + (K195090322 * tmp751) + (K980785280 * tmp752); + tmp777 = + (K980785280 * tmp751) - (K195090322 * tmp752); + } + tmp754 = tmp750 - tmp753; + tmp1104 = tmp750 + tmp753; + tmp778 = tmp776 + tmp777; + tmp1116 = tmp777 - tmp776; + } + { + fftw_real tmp758; + fftw_real tmp780; + fftw_real tmp761; + fftw_real tmp781; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp756; + fftw_real tmp757; + fftw_real tmp759; + fftw_real tmp760; + ASSERT_ALIGNED_DOUBLE; + tmp756 = tmp695 + tmp696; + tmp757 = tmp692 + tmp689; + tmp758 = tmp756 - tmp757; + tmp780 = tmp756 + tmp757; + tmp759 = tmp684 + tmp685; + tmp760 = tmp698 + tmp699; + tmp761 = tmp759 - tmp760; + tmp781 = tmp759 + tmp760; + } + tmp762 = (K634393284 * tmp758) + (K773010453 * tmp761); + tmp772 = (K634393284 * tmp761) - (K773010453 * tmp758); + tmp782 = (K995184726 * tmp780) + (K098017140 * tmp781); + tmp788 = (K995184726 * tmp781) - (K098017140 * tmp780); + } + { + fftw_real tmp765; + fftw_real tmp783; + fftw_real tmp768; + fftw_real tmp784; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp763; + fftw_real tmp764; + fftw_real tmp766; + fftw_real tmp767; + ASSERT_ALIGNED_DOUBLE; + tmp763 = tmp703 + tmp704; + tmp764 = tmp718 + tmp717; + tmp765 = tmp763 - tmp764; + tmp783 = tmp763 + tmp764; + tmp766 = tmp714 + tmp715; + tmp767 = tmp708 + tmp711; + tmp768 = tmp766 - tmp767; + tmp784 = tmp766 + tmp767; + } + tmp769 = (K634393284 * tmp765) - (K773010453 * tmp768); + tmp773 = (K773010453 * tmp765) + (K634393284 * tmp768); + tmp785 = (K995184726 * tmp783) - (K098017140 * tmp784); + tmp789 = (K098017140 * tmp783) + (K995184726 * tmp784); + } + { + fftw_real tmp755; + fftw_real tmp770; + fftw_real tmp771; + fftw_real tmp774; + ASSERT_ALIGNED_DOUBLE; + tmp755 = tmp747 + tmp754; + tmp770 = tmp762 + tmp769; + c_re(inout[41 * iostride]) = tmp755 - tmp770; + c_re(inout[9 * iostride]) = tmp755 + tmp770; + tmp771 = tmp747 - tmp754; + tmp774 = tmp772 - tmp773; + c_re(inout[57 * iostride]) = tmp771 - tmp774; + c_re(inout[25 * iostride]) = tmp771 + tmp774; + } + { + fftw_real tmp1115; + fftw_real tmp1118; + fftw_real tmp1119; + fftw_real tmp1120; + ASSERT_ALIGNED_DOUBLE; + tmp1115 = tmp772 + tmp773; + tmp1118 = tmp1116 + tmp1117; + c_im(inout[9 * iostride]) = tmp1115 + tmp1118; + c_im(inout[41 * iostride]) = tmp1118 - tmp1115; + tmp1119 = tmp769 - tmp762; + tmp1120 = tmp1117 - tmp1116; + c_im(inout[25 * iostride]) = tmp1119 + tmp1120; + c_im(inout[57 * iostride]) = tmp1120 - tmp1119; + } + { + fftw_real tmp779; + fftw_real tmp786; + fftw_real tmp787; + fftw_real tmp790; + ASSERT_ALIGNED_DOUBLE; + tmp779 = tmp775 + tmp778; + tmp786 = tmp782 + tmp785; + c_re(inout[33 * iostride]) = tmp779 - tmp786; + c_re(inout[iostride]) = tmp779 + tmp786; + tmp787 = tmp775 - tmp778; + tmp790 = tmp788 - tmp789; + c_re(inout[49 * iostride]) = tmp787 - tmp790; + c_re(inout[17 * iostride]) = tmp787 + tmp790; + } + { + fftw_real tmp1103; + fftw_real tmp1112; + fftw_real tmp1113; + fftw_real tmp1114; + ASSERT_ALIGNED_DOUBLE; + tmp1103 = tmp788 + tmp789; + tmp1112 = tmp1104 + tmp1111; + c_im(inout[iostride]) = tmp1103 + tmp1112; + c_im(inout[33 * iostride]) = tmp1112 - tmp1103; + tmp1113 = tmp785 - tmp782; + tmp1114 = tmp1111 - tmp1104; + c_im(inout[17 * iostride]) = tmp1113 + tmp1114; + c_im(inout[49 * iostride]) = tmp1114 - tmp1113; + } + } + } +} + +static const int twiddle_order[] = + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, +20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, +44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 }; +fftw_codelet_desc fftw_twiddle_64_desc = { + "fftw_twiddle_64", + (void (*)()) fftw_twiddle_64, + 64, + FFTW_FORWARD, + FFTW_TWIDDLE, + 1408, + 63, + twiddle_order, +}; diff --git a/src/fftw/ftw_7.c b/src/fftw/ftw_7.c new file mode 100644 index 0000000..390987d --- /dev/null +++ b/src/fftw/ftw_7.c @@ -0,0 +1,272 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:07:33 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 7 */ + +/* + * This function contains 72 FP additions, 60 FP multiplications, + * (or, 60 additions, 48 multiplications, 12 fused multiply/add), + * 24 stack variables, and 28 memory accesses + */ +static const fftw_real K222520933 = +FFTW_KONST(+0.222520933956314404288902564496794759466355569); +static const fftw_real K900968867 = +FFTW_KONST(+0.900968867902419126236102319507445051165919162); +static const fftw_real K623489801 = +FFTW_KONST(+0.623489801858733530525004884004239810632274731); +static const fftw_real K433883739 = +FFTW_KONST(+0.433883739117558120475768332848358754609990728); +static const fftw_real K974927912 = +FFTW_KONST(+0.974927912181823607018131682993931217232785801); +static const fftw_real K781831482 = +FFTW_KONST(+0.781831482468029808708444526674057750232334519); + +/* + * Generator Id's : + * $Id: ftw_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_twiddle_7(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 6) { + fftw_real tmp1; + fftw_real tmp53; + fftw_real tmp12; + fftw_real tmp54; + fftw_real tmp38; + fftw_real tmp50; + fftw_real tmp23; + fftw_real tmp55; + fftw_real tmp44; + fftw_real tmp51; + fftw_real tmp34; + fftw_real tmp56; + fftw_real tmp41; + fftw_real tmp52; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp53 = c_im(inout[0]); + { + fftw_real tmp6; + fftw_real tmp36; + fftw_real tmp11; + fftw_real tmp37; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[iostride]); + tmp5 = c_im(inout[iostride]); + tmp2 = c_re(W[0]); + tmp4 = c_im(W[0]); + tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5); + tmp36 = (tmp4 * tmp3) + (tmp2 * tmp5); + } + { + fftw_real tmp8; + fftw_real tmp10; + fftw_real tmp7; + fftw_real tmp9; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(inout[6 * iostride]); + tmp10 = c_im(inout[6 * iostride]); + tmp7 = c_re(W[5]); + tmp9 = c_im(W[5]); + tmp11 = (tmp7 * tmp8) - (tmp9 * tmp10); + tmp37 = (tmp9 * tmp8) + (tmp7 * tmp10); + } + tmp12 = tmp6 + tmp11; + tmp54 = tmp11 - tmp6; + tmp38 = tmp36 - tmp37; + tmp50 = tmp36 + tmp37; + } + { + fftw_real tmp17; + fftw_real tmp42; + fftw_real tmp22; + fftw_real tmp43; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[2 * iostride]); + tmp16 = c_im(inout[2 * iostride]); + tmp13 = c_re(W[1]); + tmp15 = c_im(W[1]); + tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16); + tmp42 = (tmp15 * tmp14) + (tmp13 * tmp16); + } + { + fftw_real tmp19; + fftw_real tmp21; + fftw_real tmp18; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + tmp19 = c_re(inout[5 * iostride]); + tmp21 = c_im(inout[5 * iostride]); + tmp18 = c_re(W[4]); + tmp20 = c_im(W[4]); + tmp22 = (tmp18 * tmp19) - (tmp20 * tmp21); + tmp43 = (tmp20 * tmp19) + (tmp18 * tmp21); + } + tmp23 = tmp17 + tmp22; + tmp55 = tmp22 - tmp17; + tmp44 = tmp42 - tmp43; + tmp51 = tmp42 + tmp43; + } + { + fftw_real tmp28; + fftw_real tmp39; + fftw_real tmp33; + fftw_real tmp40; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp25; + fftw_real tmp27; + fftw_real tmp24; + fftw_real tmp26; + ASSERT_ALIGNED_DOUBLE; + tmp25 = c_re(inout[3 * iostride]); + tmp27 = c_im(inout[3 * iostride]); + tmp24 = c_re(W[2]); + tmp26 = c_im(W[2]); + tmp28 = (tmp24 * tmp25) - (tmp26 * tmp27); + tmp39 = (tmp26 * tmp25) + (tmp24 * tmp27); + } + { + fftw_real tmp30; + fftw_real tmp32; + fftw_real tmp29; + fftw_real tmp31; + ASSERT_ALIGNED_DOUBLE; + tmp30 = c_re(inout[4 * iostride]); + tmp32 = c_im(inout[4 * iostride]); + tmp29 = c_re(W[3]); + tmp31 = c_im(W[3]); + tmp33 = (tmp29 * tmp30) - (tmp31 * tmp32); + tmp40 = (tmp31 * tmp30) + (tmp29 * tmp32); + } + tmp34 = tmp28 + tmp33; + tmp56 = tmp33 - tmp28; + tmp41 = tmp39 - tmp40; + tmp52 = tmp39 + tmp40; + } + { + fftw_real tmp47; + fftw_real tmp46; + fftw_real tmp59; + fftw_real tmp60; + ASSERT_ALIGNED_DOUBLE; + c_re(inout[0]) = tmp1 + tmp12 + tmp23 + tmp34; + tmp47 = + (K781831482 * tmp38) + (K974927912 * tmp44) + + (K433883739 * tmp41); + tmp46 = + tmp1 + (K623489801 * tmp12) - (K900968867 * tmp34) - + (K222520933 * tmp23); + c_re(inout[6 * iostride]) = tmp46 - tmp47; + c_re(inout[iostride]) = tmp46 + tmp47; + { + fftw_real tmp49; + fftw_real tmp48; + fftw_real tmp45; + fftw_real tmp35; + ASSERT_ALIGNED_DOUBLE; + tmp49 = + (K433883739 * tmp38) + (K974927912 * tmp41) - + (K781831482 * tmp44); + tmp48 = + tmp1 + (K623489801 * tmp23) - + (K222520933 * tmp34) - (K900968867 * tmp12); + c_re(inout[4 * iostride]) = tmp48 - tmp49; + c_re(inout[3 * iostride]) = tmp48 + tmp49; + tmp45 = + (K974927912 * tmp38) - (K781831482 * tmp41) - + (K433883739 * tmp44); + tmp35 = + tmp1 + (K623489801 * tmp34) - + (K900968867 * tmp23) - (K222520933 * tmp12); + c_re(inout[5 * iostride]) = tmp35 - tmp45; + c_re(inout[2 * iostride]) = tmp35 + tmp45; + } + c_im(inout[0]) = tmp50 + tmp51 + tmp52 + tmp53; + tmp59 = + (K974927912 * tmp54) - (K781831482 * tmp56) - + (K433883739 * tmp55); + tmp60 = + (K623489801 * tmp52) + tmp53 - (K900968867 * tmp51) - + (K222520933 * tmp50); + c_im(inout[2 * iostride]) = tmp59 + tmp60; + c_im(inout[5 * iostride]) = tmp60 - tmp59; + { + fftw_real tmp61; + fftw_real tmp62; + fftw_real tmp57; + fftw_real tmp58; + ASSERT_ALIGNED_DOUBLE; + tmp61 = + (K433883739 * tmp54) + (K974927912 * tmp56) - + (K781831482 * tmp55); + tmp62 = + (K623489801 * tmp51) + tmp53 - + (K222520933 * tmp52) - (K900968867 * tmp50); + c_im(inout[3 * iostride]) = tmp61 + tmp62; + c_im(inout[4 * iostride]) = tmp62 - tmp61; + tmp57 = + (K781831482 * tmp54) + (K974927912 * tmp55) + + (K433883739 * tmp56); + tmp58 = + (K623489801 * tmp50) + tmp53 - + (K900968867 * tmp52) - (K222520933 * tmp51); + c_im(inout[iostride]) = tmp57 + tmp58; + c_im(inout[6 * iostride]) = tmp58 - tmp57; + } + } + } +} + +static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6 }; +fftw_codelet_desc fftw_twiddle_7_desc = { + "fftw_twiddle_7", + (void (*)()) fftw_twiddle_7, + 7, + FFTW_FORWARD, + FFTW_TWIDDLE, + 154, + 6, + twiddle_order, +}; diff --git a/src/fftw/ftw_8.c b/src/fftw/ftw_8.c new file mode 100644 index 0000000..a155bdd --- /dev/null +++ b/src/fftw/ftw_8.c @@ -0,0 +1,285 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:07:38 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 8 */ + +/* + * This function contains 66 FP additions, 32 FP multiplications, + * (or, 52 additions, 18 multiplications, 14 fused multiply/add), + * 28 stack variables, and 32 memory accesses + */ +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: ftw_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_twiddle_8(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 7) { + fftw_real tmp7; + fftw_real tmp43; + fftw_real tmp71; + fftw_real tmp76; + fftw_real tmp41; + fftw_real tmp53; + fftw_real tmp56; + fftw_real tmp65; + fftw_real tmp18; + fftw_real tmp77; + fftw_real tmp46; + fftw_real tmp68; + fftw_real tmp30; + fftw_real tmp48; + fftw_real tmp51; + fftw_real tmp64; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp70; + fftw_real tmp6; + fftw_real tmp69; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp70 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[4 * iostride]); + tmp5 = c_im(inout[4 * iostride]); + tmp2 = c_re(W[3]); + tmp4 = c_im(W[3]); + tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5); + tmp69 = (tmp4 * tmp3) + (tmp2 * tmp5); + } + tmp7 = tmp1 + tmp6; + tmp43 = tmp1 - tmp6; + tmp71 = tmp69 + tmp70; + tmp76 = tmp70 - tmp69; + } + { + fftw_real tmp35; + fftw_real tmp54; + fftw_real tmp40; + fftw_real tmp55; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp32; + fftw_real tmp34; + fftw_real tmp31; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(inout[7 * iostride]); + tmp34 = c_im(inout[7 * iostride]); + tmp31 = c_re(W[6]); + tmp33 = c_im(W[6]); + tmp35 = (tmp31 * tmp32) - (tmp33 * tmp34); + tmp54 = (tmp33 * tmp32) + (tmp31 * tmp34); + } + { + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp36; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp37 = c_re(inout[3 * iostride]); + tmp39 = c_im(inout[3 * iostride]); + tmp36 = c_re(W[2]); + tmp38 = c_im(W[2]); + tmp40 = (tmp36 * tmp37) - (tmp38 * tmp39); + tmp55 = (tmp38 * tmp37) + (tmp36 * tmp39); + } + tmp41 = tmp35 + tmp40; + tmp53 = tmp35 - tmp40; + tmp56 = tmp54 - tmp55; + tmp65 = tmp54 + tmp55; + } + { + fftw_real tmp12; + fftw_real tmp44; + fftw_real tmp17; + fftw_real tmp45; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[2 * iostride]); + tmp11 = c_im(inout[2 * iostride]); + tmp8 = c_re(W[1]); + tmp10 = c_im(W[1]); + tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11); + tmp44 = (tmp10 * tmp9) + (tmp8 * tmp11); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[6 * iostride]); + tmp16 = c_im(inout[6 * iostride]); + tmp13 = c_re(W[5]); + tmp15 = c_im(W[5]); + tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16); + tmp45 = (tmp15 * tmp14) + (tmp13 * tmp16); + } + tmp18 = tmp12 + tmp17; + tmp77 = tmp12 - tmp17; + tmp46 = tmp44 - tmp45; + tmp68 = tmp44 + tmp45; + } + { + fftw_real tmp24; + fftw_real tmp49; + fftw_real tmp29; + fftw_real tmp50; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp21; + fftw_real tmp23; + fftw_real tmp20; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp21 = c_re(inout[iostride]); + tmp23 = c_im(inout[iostride]); + tmp20 = c_re(W[0]); + tmp22 = c_im(W[0]); + tmp24 = (tmp20 * tmp21) - (tmp22 * tmp23); + tmp49 = (tmp22 * tmp21) + (tmp20 * tmp23); + } + { + fftw_real tmp26; + fftw_real tmp28; + fftw_real tmp25; + fftw_real tmp27; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(inout[5 * iostride]); + tmp28 = c_im(inout[5 * iostride]); + tmp25 = c_re(W[4]); + tmp27 = c_im(W[4]); + tmp29 = (tmp25 * tmp26) - (tmp27 * tmp28); + tmp50 = (tmp27 * tmp26) + (tmp25 * tmp28); + } + tmp30 = tmp24 + tmp29; + tmp48 = tmp24 - tmp29; + tmp51 = tmp49 - tmp50; + tmp64 = tmp49 + tmp50; + } + { + fftw_real tmp19; + fftw_real tmp42; + fftw_real tmp63; + fftw_real tmp66; + ASSERT_ALIGNED_DOUBLE; + tmp19 = tmp7 + tmp18; + tmp42 = tmp30 + tmp41; + c_re(inout[4 * iostride]) = tmp19 - tmp42; + c_re(inout[0]) = tmp19 + tmp42; + { + fftw_real tmp73; + fftw_real tmp74; + fftw_real tmp67; + fftw_real tmp72; + ASSERT_ALIGNED_DOUBLE; + tmp73 = tmp41 - tmp30; + tmp74 = tmp71 - tmp68; + c_im(inout[2 * iostride]) = tmp73 + tmp74; + c_im(inout[6 * iostride]) = tmp74 - tmp73; + tmp67 = tmp64 + tmp65; + tmp72 = tmp68 + tmp71; + c_im(inout[0]) = tmp67 + tmp72; + c_im(inout[4 * iostride]) = tmp72 - tmp67; + } + tmp63 = tmp7 - tmp18; + tmp66 = tmp64 - tmp65; + c_re(inout[6 * iostride]) = tmp63 - tmp66; + c_re(inout[2 * iostride]) = tmp63 + tmp66; + { + fftw_real tmp59; + fftw_real tmp78; + fftw_real tmp62; + fftw_real tmp75; + fftw_real tmp60; + fftw_real tmp61; + ASSERT_ALIGNED_DOUBLE; + tmp59 = tmp43 - tmp46; + tmp78 = tmp76 - tmp77; + tmp60 = tmp51 - tmp48; + tmp61 = tmp53 + tmp56; + tmp62 = K707106781 * (tmp60 - tmp61); + tmp75 = K707106781 * (tmp60 + tmp61); + c_re(inout[7 * iostride]) = tmp59 - tmp62; + c_re(inout[3 * iostride]) = tmp59 + tmp62; + c_im(inout[iostride]) = tmp75 + tmp78; + c_im(inout[5 * iostride]) = tmp78 - tmp75; + } + { + fftw_real tmp47; + fftw_real tmp80; + fftw_real tmp58; + fftw_real tmp79; + fftw_real tmp52; + fftw_real tmp57; + ASSERT_ALIGNED_DOUBLE; + tmp47 = tmp43 + tmp46; + tmp80 = tmp77 + tmp76; + tmp52 = tmp48 + tmp51; + tmp57 = tmp53 - tmp56; + tmp58 = K707106781 * (tmp52 + tmp57); + tmp79 = K707106781 * (tmp57 - tmp52); + c_re(inout[5 * iostride]) = tmp47 - tmp58; + c_re(inout[iostride]) = tmp47 + tmp58; + c_im(inout[3 * iostride]) = tmp79 + tmp80; + c_im(inout[7 * iostride]) = tmp80 - tmp79; + } + } + } +} + +static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6, 7 }; +fftw_codelet_desc fftw_twiddle_8_desc = { + "fftw_twiddle_8", + (void (*)()) fftw_twiddle_8, + 8, + FFTW_FORWARD, + FFTW_TWIDDLE, + 176, + 7, + twiddle_order, +}; diff --git a/src/fftw/ftw_9.c b/src/fftw/ftw_9.c new file mode 100644 index 0000000..3990a3c --- /dev/null +++ b/src/fftw/ftw_9.c @@ -0,0 +1,377 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:07:39 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 9 */ + +/* + * This function contains 96 FP additions, 72 FP multiplications, + * (or, 60 additions, 36 multiplications, 36 fused multiply/add), + * 34 stack variables, and 36 memory accesses + */ +static const fftw_real K939692620 = +FFTW_KONST(+0.939692620785908384054109277324731469936208134); +static const fftw_real K342020143 = +FFTW_KONST(+0.342020143325668733044099614682259580763083368); +static const fftw_real K984807753 = +FFTW_KONST(+0.984807753012208059366743024589523013670643252); +static const fftw_real K173648177 = +FFTW_KONST(+0.173648177666930348851716626769314796000375677); +static const fftw_real K642787609 = +FFTW_KONST(+0.642787609686539326322643409907263432907559884); +static const fftw_real K766044443 = +FFTW_KONST(+0.766044443118978035202392650555416673935832457); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); + +/* + * Generator Id's : + * $Id: ftw_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftw_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftw_twiddle_9(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 8) { + fftw_real tmp1; + fftw_real tmp99; + fftw_real tmp52; + fftw_real tmp98; + fftw_real tmp105; + fftw_real tmp104; + fftw_real tmp12; + fftw_real tmp49; + fftw_real tmp47; + fftw_real tmp69; + fftw_real tmp86; + fftw_real tmp95; + fftw_real tmp74; + fftw_real tmp85; + fftw_real tmp30; + fftw_real tmp58; + fftw_real tmp82; + fftw_real tmp94; + fftw_real tmp63; + fftw_real tmp83; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp6; + fftw_real tmp50; + fftw_real tmp11; + fftw_real tmp51; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp99 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[3 * iostride]); + tmp5 = c_im(inout[3 * iostride]); + tmp2 = c_re(W[2]); + tmp4 = c_im(W[2]); + tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5); + tmp50 = (tmp4 * tmp3) + (tmp2 * tmp5); + } + { + fftw_real tmp8; + fftw_real tmp10; + fftw_real tmp7; + fftw_real tmp9; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(inout[6 * iostride]); + tmp10 = c_im(inout[6 * iostride]); + tmp7 = c_re(W[5]); + tmp9 = c_im(W[5]); + tmp11 = (tmp7 * tmp8) - (tmp9 * tmp10); + tmp51 = (tmp9 * tmp8) + (tmp7 * tmp10); + } + tmp52 = K866025403 * (tmp50 - tmp51); + tmp98 = tmp50 + tmp51; + tmp105 = tmp99 - (K500000000 * tmp98); + tmp104 = K866025403 * (tmp11 - tmp6); + tmp12 = tmp6 + tmp11; + tmp49 = tmp1 - (K500000000 * tmp12); + } + { + fftw_real tmp35; + fftw_real tmp71; + fftw_real tmp40; + fftw_real tmp66; + fftw_real tmp45; + fftw_real tmp67; + fftw_real tmp46; + fftw_real tmp72; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp32; + fftw_real tmp34; + fftw_real tmp31; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(inout[2 * iostride]); + tmp34 = c_im(inout[2 * iostride]); + tmp31 = c_re(W[1]); + tmp33 = c_im(W[1]); + tmp35 = (tmp31 * tmp32) - (tmp33 * tmp34); + tmp71 = (tmp33 * tmp32) + (tmp31 * tmp34); + } + { + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp36; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp37 = c_re(inout[5 * iostride]); + tmp39 = c_im(inout[5 * iostride]); + tmp36 = c_re(W[4]); + tmp38 = c_im(W[4]); + tmp40 = (tmp36 * tmp37) - (tmp38 * tmp39); + tmp66 = (tmp38 * tmp37) + (tmp36 * tmp39); + } + { + fftw_real tmp42; + fftw_real tmp44; + fftw_real tmp41; + fftw_real tmp43; + ASSERT_ALIGNED_DOUBLE; + tmp42 = c_re(inout[8 * iostride]); + tmp44 = c_im(inout[8 * iostride]); + tmp41 = c_re(W[7]); + tmp43 = c_im(W[7]); + tmp45 = (tmp41 * tmp42) - (tmp43 * tmp44); + tmp67 = (tmp43 * tmp42) + (tmp41 * tmp44); + } + tmp46 = tmp40 + tmp45; + tmp72 = tmp66 + tmp67; + { + fftw_real tmp65; + fftw_real tmp68; + fftw_real tmp70; + fftw_real tmp73; + ASSERT_ALIGNED_DOUBLE; + tmp47 = tmp35 + tmp46; + tmp65 = tmp35 - (K500000000 * tmp46); + tmp68 = K866025403 * (tmp66 - tmp67); + tmp69 = tmp65 + tmp68; + tmp86 = tmp65 - tmp68; + tmp95 = tmp71 + tmp72; + tmp70 = K866025403 * (tmp45 - tmp40); + tmp73 = tmp71 - (K500000000 * tmp72); + tmp74 = tmp70 + tmp73; + tmp85 = tmp73 - tmp70; + } + } + { + fftw_real tmp18; + fftw_real tmp60; + fftw_real tmp23; + fftw_real tmp55; + fftw_real tmp28; + fftw_real tmp56; + fftw_real tmp29; + fftw_real tmp61; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp15; + fftw_real tmp17; + fftw_real tmp14; + fftw_real tmp16; + ASSERT_ALIGNED_DOUBLE; + tmp15 = c_re(inout[iostride]); + tmp17 = c_im(inout[iostride]); + tmp14 = c_re(W[0]); + tmp16 = c_im(W[0]); + tmp18 = (tmp14 * tmp15) - (tmp16 * tmp17); + tmp60 = (tmp16 * tmp15) + (tmp14 * tmp17); + } + { + fftw_real tmp20; + fftw_real tmp22; + fftw_real tmp19; + fftw_real tmp21; + ASSERT_ALIGNED_DOUBLE; + tmp20 = c_re(inout[4 * iostride]); + tmp22 = c_im(inout[4 * iostride]); + tmp19 = c_re(W[3]); + tmp21 = c_im(W[3]); + tmp23 = (tmp19 * tmp20) - (tmp21 * tmp22); + tmp55 = (tmp21 * tmp20) + (tmp19 * tmp22); + } + { + fftw_real tmp25; + fftw_real tmp27; + fftw_real tmp24; + fftw_real tmp26; + ASSERT_ALIGNED_DOUBLE; + tmp25 = c_re(inout[7 * iostride]); + tmp27 = c_im(inout[7 * iostride]); + tmp24 = c_re(W[6]); + tmp26 = c_im(W[6]); + tmp28 = (tmp24 * tmp25) - (tmp26 * tmp27); + tmp56 = (tmp26 * tmp25) + (tmp24 * tmp27); + } + tmp29 = tmp23 + tmp28; + tmp61 = tmp55 + tmp56; + { + fftw_real tmp54; + fftw_real tmp57; + fftw_real tmp59; + fftw_real tmp62; + ASSERT_ALIGNED_DOUBLE; + tmp30 = tmp18 + tmp29; + tmp54 = tmp18 - (K500000000 * tmp29); + tmp57 = K866025403 * (tmp55 - tmp56); + tmp58 = tmp54 + tmp57; + tmp82 = tmp54 - tmp57; + tmp94 = tmp60 + tmp61; + tmp59 = K866025403 * (tmp28 - tmp23); + tmp62 = tmp60 - (K500000000 * tmp61); + tmp63 = tmp59 + tmp62; + tmp83 = tmp62 - tmp59; + } + } + { + fftw_real tmp96; + fftw_real tmp13; + fftw_real tmp48; + fftw_real tmp93; + ASSERT_ALIGNED_DOUBLE; + tmp96 = K866025403 * (tmp94 - tmp95); + tmp13 = tmp1 + tmp12; + tmp48 = tmp30 + tmp47; + tmp93 = tmp13 - (K500000000 * tmp48); + c_re(inout[0]) = tmp13 + tmp48; + c_re(inout[3 * iostride]) = tmp93 + tmp96; + c_re(inout[6 * iostride]) = tmp93 - tmp96; + } + { + fftw_real tmp101; + fftw_real tmp97; + fftw_real tmp100; + fftw_real tmp102; + ASSERT_ALIGNED_DOUBLE; + tmp101 = K866025403 * (tmp47 - tmp30); + tmp97 = tmp94 + tmp95; + tmp100 = tmp98 + tmp99; + tmp102 = tmp100 - (K500000000 * tmp97); + c_im(inout[0]) = tmp97 + tmp100; + c_im(inout[6 * iostride]) = tmp102 - tmp101; + c_im(inout[3 * iostride]) = tmp101 + tmp102; + } + { + fftw_real tmp53; + fftw_real tmp106; + fftw_real tmp76; + fftw_real tmp107; + fftw_real tmp80; + fftw_real tmp103; + fftw_real tmp77; + fftw_real tmp108; + ASSERT_ALIGNED_DOUBLE; + tmp53 = tmp49 + tmp52; + tmp106 = tmp104 + tmp105; + { + fftw_real tmp64; + fftw_real tmp75; + fftw_real tmp78; + fftw_real tmp79; + ASSERT_ALIGNED_DOUBLE; + tmp64 = (K766044443 * tmp58) + (K642787609 * tmp63); + tmp75 = (K173648177 * tmp69) + (K984807753 * tmp74); + tmp76 = tmp64 + tmp75; + tmp107 = K866025403 * (tmp75 - tmp64); + tmp78 = (K766044443 * tmp63) - (K642787609 * tmp58); + tmp79 = (K173648177 * tmp74) - (K984807753 * tmp69); + tmp80 = K866025403 * (tmp78 - tmp79); + tmp103 = tmp78 + tmp79; + } + c_re(inout[iostride]) = tmp53 + tmp76; + tmp77 = tmp53 - (K500000000 * tmp76); + c_re(inout[7 * iostride]) = tmp77 - tmp80; + c_re(inout[4 * iostride]) = tmp77 + tmp80; + c_im(inout[iostride]) = tmp103 + tmp106; + tmp108 = tmp106 - (K500000000 * tmp103); + c_im(inout[4 * iostride]) = tmp107 + tmp108; + c_im(inout[7 * iostride]) = tmp108 - tmp107; + } + { + fftw_real tmp81; + fftw_real tmp110; + fftw_real tmp88; + fftw_real tmp111; + fftw_real tmp92; + fftw_real tmp109; + fftw_real tmp89; + fftw_real tmp112; + ASSERT_ALIGNED_DOUBLE; + tmp81 = tmp49 - tmp52; + tmp110 = tmp105 - tmp104; + { + fftw_real tmp84; + fftw_real tmp87; + fftw_real tmp90; + fftw_real tmp91; + ASSERT_ALIGNED_DOUBLE; + tmp84 = (K173648177 * tmp82) + (K984807753 * tmp83); + tmp87 = (K342020143 * tmp85) - (K939692620 * tmp86); + tmp88 = tmp84 + tmp87; + tmp111 = K866025403 * (tmp87 - tmp84); + tmp90 = (K173648177 * tmp83) - (K984807753 * tmp82); + tmp91 = (K342020143 * tmp86) + (K939692620 * tmp85); + tmp92 = K866025403 * (tmp90 + tmp91); + tmp109 = tmp90 - tmp91; + } + c_re(inout[2 * iostride]) = tmp81 + tmp88; + tmp89 = tmp81 - (K500000000 * tmp88); + c_re(inout[8 * iostride]) = tmp89 - tmp92; + c_re(inout[5 * iostride]) = tmp89 + tmp92; + c_im(inout[2 * iostride]) = tmp109 + tmp110; + tmp112 = tmp110 - (K500000000 * tmp109); + c_im(inout[5 * iostride]) = tmp111 + tmp112; + c_im(inout[8 * iostride]) = tmp112 - tmp111; + } + } +} + +static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; +fftw_codelet_desc fftw_twiddle_9_desc = { + "fftw_twiddle_9", + (void (*)()) fftw_twiddle_9, + 9, + FFTW_FORWARD, + FFTW_TWIDDLE, + 198, + 8, + twiddle_order, +}; diff --git a/src/fftw/ftwi_10.c b/src/fftw/ftwi_10.c new file mode 100644 index 0000000..cd8da09 --- /dev/null +++ b/src/fftw/ftwi_10.c @@ -0,0 +1,378 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:08:33 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 10 */ + +/* + * This function contains 102 FP additions, 60 FP multiplications, + * (or, 72 additions, 30 multiplications, 30 fused multiply/add), + * 42 stack variables, and 40 memory accesses + */ +static const fftw_real K951056516 = +FFTW_KONST(+0.951056516295153572116439333379382143405698634); +static const fftw_real K587785252 = +FFTW_KONST(+0.587785252292473129168705954639072768597652438); +static const fftw_real K250000000 = +FFTW_KONST(+0.250000000000000000000000000000000000000000000); +static const fftw_real K559016994 = +FFTW_KONST(+0.559016994374947424102293417182819058860154590); + +/* + * Generator Id's : + * $Id: ftwi_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_twiddle_10(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 9) { + fftw_real tmp7; + fftw_real tmp55; + fftw_real tmp100; + fftw_real tmp115; + fftw_real tmp41; + fftw_real tmp52; + fftw_real tmp53; + fftw_real tmp59; + fftw_real tmp60; + fftw_real tmp61; + fftw_real tmp75; + fftw_real tmp78; + fftw_real tmp113; + fftw_real tmp89; + fftw_real tmp90; + fftw_real tmp96; + fftw_real tmp18; + fftw_real tmp29; + fftw_real tmp30; + fftw_real tmp56; + fftw_real tmp57; + fftw_real tmp58; + fftw_real tmp68; + fftw_real tmp71; + fftw_real tmp112; + fftw_real tmp86; + fftw_real tmp87; + fftw_real tmp95; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp99; + fftw_real tmp6; + fftw_real tmp98; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp99 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[5 * iostride]); + tmp5 = c_im(inout[5 * iostride]); + tmp2 = c_re(W[4]); + tmp4 = c_im(W[4]); + tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5); + tmp98 = (tmp2 * tmp5) - (tmp4 * tmp3); + } + tmp7 = tmp1 - tmp6; + tmp55 = tmp1 + tmp6; + tmp100 = tmp98 + tmp99; + tmp115 = tmp99 - tmp98; + } + { + fftw_real tmp35; + fftw_real tmp73; + fftw_real tmp51; + fftw_real tmp77; + fftw_real tmp40; + fftw_real tmp74; + fftw_real tmp46; + fftw_real tmp76; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp32; + fftw_real tmp34; + fftw_real tmp31; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(inout[4 * iostride]); + tmp34 = c_im(inout[4 * iostride]); + tmp31 = c_re(W[3]); + tmp33 = c_im(W[3]); + tmp35 = (tmp31 * tmp32) + (tmp33 * tmp34); + tmp73 = (tmp31 * tmp34) - (tmp33 * tmp32); + } + { + fftw_real tmp48; + fftw_real tmp50; + fftw_real tmp47; + fftw_real tmp49; + ASSERT_ALIGNED_DOUBLE; + tmp48 = c_re(inout[iostride]); + tmp50 = c_im(inout[iostride]); + tmp47 = c_re(W[0]); + tmp49 = c_im(W[0]); + tmp51 = (tmp47 * tmp48) + (tmp49 * tmp50); + tmp77 = (tmp47 * tmp50) - (tmp49 * tmp48); + } + { + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp36; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp37 = c_re(inout[9 * iostride]); + tmp39 = c_im(inout[9 * iostride]); + tmp36 = c_re(W[8]); + tmp38 = c_im(W[8]); + tmp40 = (tmp36 * tmp37) + (tmp38 * tmp39); + tmp74 = (tmp36 * tmp39) - (tmp38 * tmp37); + } + { + fftw_real tmp43; + fftw_real tmp45; + fftw_real tmp42; + fftw_real tmp44; + ASSERT_ALIGNED_DOUBLE; + tmp43 = c_re(inout[6 * iostride]); + tmp45 = c_im(inout[6 * iostride]); + tmp42 = c_re(W[5]); + tmp44 = c_im(W[5]); + tmp46 = (tmp42 * tmp43) + (tmp44 * tmp45); + tmp76 = (tmp42 * tmp45) - (tmp44 * tmp43); + } + tmp41 = tmp35 - tmp40; + tmp52 = tmp46 - tmp51; + tmp53 = tmp41 + tmp52; + tmp59 = tmp35 + tmp40; + tmp60 = tmp46 + tmp51; + tmp61 = tmp59 + tmp60; + tmp75 = tmp73 - tmp74; + tmp78 = tmp76 - tmp77; + tmp113 = tmp75 + tmp78; + tmp89 = tmp73 + tmp74; + tmp90 = tmp76 + tmp77; + tmp96 = tmp89 + tmp90; + } + { + fftw_real tmp12; + fftw_real tmp66; + fftw_real tmp28; + fftw_real tmp70; + fftw_real tmp17; + fftw_real tmp67; + fftw_real tmp23; + fftw_real tmp69; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[2 * iostride]); + tmp11 = c_im(inout[2 * iostride]); + tmp8 = c_re(W[1]); + tmp10 = c_im(W[1]); + tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11); + tmp66 = (tmp8 * tmp11) - (tmp10 * tmp9); + } + { + fftw_real tmp25; + fftw_real tmp27; + fftw_real tmp24; + fftw_real tmp26; + ASSERT_ALIGNED_DOUBLE; + tmp25 = c_re(inout[3 * iostride]); + tmp27 = c_im(inout[3 * iostride]); + tmp24 = c_re(W[2]); + tmp26 = c_im(W[2]); + tmp28 = (tmp24 * tmp25) + (tmp26 * tmp27); + tmp70 = (tmp24 * tmp27) - (tmp26 * tmp25); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[7 * iostride]); + tmp16 = c_im(inout[7 * iostride]); + tmp13 = c_re(W[6]); + tmp15 = c_im(W[6]); + tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16); + tmp67 = (tmp13 * tmp16) - (tmp15 * tmp14); + } + { + fftw_real tmp20; + fftw_real tmp22; + fftw_real tmp19; + fftw_real tmp21; + ASSERT_ALIGNED_DOUBLE; + tmp20 = c_re(inout[8 * iostride]); + tmp22 = c_im(inout[8 * iostride]); + tmp19 = c_re(W[7]); + tmp21 = c_im(W[7]); + tmp23 = (tmp19 * tmp20) + (tmp21 * tmp22); + tmp69 = (tmp19 * tmp22) - (tmp21 * tmp20); + } + tmp18 = tmp12 - tmp17; + tmp29 = tmp23 - tmp28; + tmp30 = tmp18 + tmp29; + tmp56 = tmp12 + tmp17; + tmp57 = tmp23 + tmp28; + tmp58 = tmp56 + tmp57; + tmp68 = tmp66 - tmp67; + tmp71 = tmp69 - tmp70; + tmp112 = tmp68 + tmp71; + tmp86 = tmp66 + tmp67; + tmp87 = tmp69 + tmp70; + tmp95 = tmp86 + tmp87; + } + { + fftw_real tmp64; + fftw_real tmp54; + fftw_real tmp63; + fftw_real tmp80; + fftw_real tmp82; + fftw_real tmp72; + fftw_real tmp79; + fftw_real tmp81; + fftw_real tmp65; + ASSERT_ALIGNED_DOUBLE; + tmp64 = K559016994 * (tmp30 - tmp53); + tmp54 = tmp30 + tmp53; + tmp63 = tmp7 - (K250000000 * tmp54); + tmp72 = tmp68 - tmp71; + tmp79 = tmp75 - tmp78; + tmp80 = (K587785252 * tmp72) - (K951056516 * tmp79); + tmp82 = (K951056516 * tmp72) + (K587785252 * tmp79); + c_re(inout[5 * iostride]) = tmp7 + tmp54; + tmp81 = tmp64 + tmp63; + c_re(inout[iostride]) = tmp81 - tmp82; + c_re(inout[9 * iostride]) = tmp81 + tmp82; + tmp65 = tmp63 - tmp64; + c_re(inout[7 * iostride]) = tmp65 - tmp80; + c_re(inout[3 * iostride]) = tmp65 + tmp80; + } + { + fftw_real tmp114; + fftw_real tmp116; + fftw_real tmp117; + fftw_real tmp111; + fftw_real tmp120; + fftw_real tmp109; + fftw_real tmp110; + fftw_real tmp119; + fftw_real tmp118; + ASSERT_ALIGNED_DOUBLE; + tmp114 = K559016994 * (tmp112 - tmp113); + tmp116 = tmp112 + tmp113; + tmp117 = tmp115 - (K250000000 * tmp116); + tmp109 = tmp18 - tmp29; + tmp110 = tmp41 - tmp52; + tmp111 = (K951056516 * tmp109) + (K587785252 * tmp110); + tmp120 = (K587785252 * tmp109) - (K951056516 * tmp110); + c_im(inout[5 * iostride]) = tmp116 + tmp115; + tmp119 = tmp117 - tmp114; + c_im(inout[3 * iostride]) = tmp119 - tmp120; + c_im(inout[7 * iostride]) = tmp120 + tmp119; + tmp118 = tmp114 + tmp117; + c_im(inout[iostride]) = tmp111 + tmp118; + c_im(inout[9 * iostride]) = tmp118 - tmp111; + } + { + fftw_real tmp84; + fftw_real tmp62; + fftw_real tmp83; + fftw_real tmp92; + fftw_real tmp94; + fftw_real tmp88; + fftw_real tmp91; + fftw_real tmp93; + fftw_real tmp85; + ASSERT_ALIGNED_DOUBLE; + tmp84 = K559016994 * (tmp58 - tmp61); + tmp62 = tmp58 + tmp61; + tmp83 = tmp55 - (K250000000 * tmp62); + tmp88 = tmp86 - tmp87; + tmp91 = tmp89 - tmp90; + tmp92 = (K587785252 * tmp88) - (K951056516 * tmp91); + tmp94 = (K951056516 * tmp88) + (K587785252 * tmp91); + c_re(inout[0]) = tmp55 + tmp62; + tmp93 = tmp84 + tmp83; + c_re(inout[6 * iostride]) = tmp93 - tmp94; + c_re(inout[4 * iostride]) = tmp93 + tmp94; + tmp85 = tmp83 - tmp84; + c_re(inout[2 * iostride]) = tmp85 - tmp92; + c_re(inout[8 * iostride]) = tmp85 + tmp92; + } + { + fftw_real tmp105; + fftw_real tmp97; + fftw_real tmp104; + fftw_real tmp103; + fftw_real tmp108; + fftw_real tmp101; + fftw_real tmp102; + fftw_real tmp107; + fftw_real tmp106; + ASSERT_ALIGNED_DOUBLE; + tmp105 = K559016994 * (tmp95 - tmp96); + tmp97 = tmp95 + tmp96; + tmp104 = tmp100 - (K250000000 * tmp97); + tmp101 = tmp56 - tmp57; + tmp102 = tmp59 - tmp60; + tmp103 = (K587785252 * tmp101) - (K951056516 * tmp102); + tmp108 = (K951056516 * tmp101) + (K587785252 * tmp102); + c_im(inout[0]) = tmp97 + tmp100; + tmp107 = tmp105 + tmp104; + c_im(inout[4 * iostride]) = tmp107 - tmp108; + c_im(inout[6 * iostride]) = tmp108 + tmp107; + tmp106 = tmp104 - tmp105; + c_im(inout[2 * iostride]) = tmp103 + tmp106; + c_im(inout[8 * iostride]) = tmp106 - tmp103; + } + } +} + +static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; +fftw_codelet_desc fftwi_twiddle_10_desc = { + "fftwi_twiddle_10", + (void (*)()) fftwi_twiddle_10, + 10, + FFTW_BACKWARD, + FFTW_TWIDDLE, + 231, + 9, + twiddle_order, +}; diff --git a/src/fftw/ftwi_16.c b/src/fftw/ftwi_16.c new file mode 100644 index 0000000..da369de --- /dev/null +++ b/src/fftw/ftwi_16.c @@ -0,0 +1,614 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:08:35 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 16 */ + +/* + * This function contains 174 FP additions, 84 FP multiplications, + * (or, 136 additions, 46 multiplications, 38 fused multiply/add), + * 50 stack variables, and 64 memory accesses + */ +static const fftw_real K382683432 = +FFTW_KONST(+0.382683432365089771728459984030398866761344562); +static const fftw_real K923879532 = +FFTW_KONST(+0.923879532511286756128183189396788286822416626); +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: ftwi_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_twiddle_16(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 15) { + fftw_real tmp7; + fftw_real tmp91; + fftw_real tmp180; + fftw_real tmp194; + fftw_real tmp18; + fftw_real tmp193; + fftw_real tmp94; + fftw_real tmp177; + fftw_real tmp77; + fftw_real tmp88; + fftw_real tmp161; + fftw_real tmp117; + fftw_real tmp141; + fftw_real tmp162; + fftw_real tmp163; + fftw_real tmp164; + fftw_real tmp112; + fftw_real tmp140; + fftw_real tmp30; + fftw_real tmp153; + fftw_real tmp100; + fftw_real tmp137; + fftw_real tmp41; + fftw_real tmp152; + fftw_real tmp105; + fftw_real tmp136; + fftw_real tmp54; + fftw_real tmp65; + fftw_real tmp156; + fftw_real tmp128; + fftw_real tmp144; + fftw_real tmp157; + fftw_real tmp158; + fftw_real tmp159; + fftw_real tmp123; + fftw_real tmp143; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp179; + fftw_real tmp6; + fftw_real tmp178; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp179 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[8 * iostride]); + tmp5 = c_im(inout[8 * iostride]); + tmp2 = c_re(W[7]); + tmp4 = c_im(W[7]); + tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5); + tmp178 = (tmp2 * tmp5) - (tmp4 * tmp3); + } + tmp7 = tmp1 + tmp6; + tmp91 = tmp1 - tmp6; + tmp180 = tmp178 + tmp179; + tmp194 = tmp179 - tmp178; + } + { + fftw_real tmp12; + fftw_real tmp92; + fftw_real tmp17; + fftw_real tmp93; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[4 * iostride]); + tmp11 = c_im(inout[4 * iostride]); + tmp8 = c_re(W[3]); + tmp10 = c_im(W[3]); + tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11); + tmp92 = (tmp8 * tmp11) - (tmp10 * tmp9); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[12 * iostride]); + tmp16 = c_im(inout[12 * iostride]); + tmp13 = c_re(W[11]); + tmp15 = c_im(W[11]); + tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16); + tmp93 = (tmp13 * tmp16) - (tmp15 * tmp14); + } + tmp18 = tmp12 + tmp17; + tmp193 = tmp12 - tmp17; + tmp94 = tmp92 - tmp93; + tmp177 = tmp92 + tmp93; + } + { + fftw_real tmp71; + fftw_real tmp108; + fftw_real tmp87; + fftw_real tmp115; + fftw_real tmp76; + fftw_real tmp109; + fftw_real tmp82; + fftw_real tmp114; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp68; + fftw_real tmp70; + fftw_real tmp67; + fftw_real tmp69; + ASSERT_ALIGNED_DOUBLE; + tmp68 = c_re(inout[15 * iostride]); + tmp70 = c_im(inout[15 * iostride]); + tmp67 = c_re(W[14]); + tmp69 = c_im(W[14]); + tmp71 = (tmp67 * tmp68) + (tmp69 * tmp70); + tmp108 = (tmp67 * tmp70) - (tmp69 * tmp68); + } + { + fftw_real tmp84; + fftw_real tmp86; + fftw_real tmp83; + fftw_real tmp85; + ASSERT_ALIGNED_DOUBLE; + tmp84 = c_re(inout[11 * iostride]); + tmp86 = c_im(inout[11 * iostride]); + tmp83 = c_re(W[10]); + tmp85 = c_im(W[10]); + tmp87 = (tmp83 * tmp84) + (tmp85 * tmp86); + tmp115 = (tmp83 * tmp86) - (tmp85 * tmp84); + } + { + fftw_real tmp73; + fftw_real tmp75; + fftw_real tmp72; + fftw_real tmp74; + ASSERT_ALIGNED_DOUBLE; + tmp73 = c_re(inout[7 * iostride]); + tmp75 = c_im(inout[7 * iostride]); + tmp72 = c_re(W[6]); + tmp74 = c_im(W[6]); + tmp76 = (tmp72 * tmp73) + (tmp74 * tmp75); + tmp109 = (tmp72 * tmp75) - (tmp74 * tmp73); + } + { + fftw_real tmp79; + fftw_real tmp81; + fftw_real tmp78; + fftw_real tmp80; + ASSERT_ALIGNED_DOUBLE; + tmp79 = c_re(inout[3 * iostride]); + tmp81 = c_im(inout[3 * iostride]); + tmp78 = c_re(W[2]); + tmp80 = c_im(W[2]); + tmp82 = (tmp78 * tmp79) + (tmp80 * tmp81); + tmp114 = (tmp78 * tmp81) - (tmp80 * tmp79); + } + { + fftw_real tmp113; + fftw_real tmp116; + fftw_real tmp110; + fftw_real tmp111; + ASSERT_ALIGNED_DOUBLE; + tmp77 = tmp71 + tmp76; + tmp88 = tmp82 + tmp87; + tmp161 = tmp77 - tmp88; + tmp113 = tmp71 - tmp76; + tmp116 = tmp114 - tmp115; + tmp117 = tmp113 - tmp116; + tmp141 = tmp113 + tmp116; + tmp162 = tmp108 + tmp109; + tmp163 = tmp114 + tmp115; + tmp164 = tmp162 - tmp163; + tmp110 = tmp108 - tmp109; + tmp111 = tmp82 - tmp87; + tmp112 = tmp110 + tmp111; + tmp140 = tmp110 - tmp111; + } + } + { + fftw_real tmp24; + fftw_real tmp97; + fftw_real tmp29; + fftw_real tmp98; + fftw_real tmp96; + fftw_real tmp99; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp21; + fftw_real tmp23; + fftw_real tmp20; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp21 = c_re(inout[2 * iostride]); + tmp23 = c_im(inout[2 * iostride]); + tmp20 = c_re(W[1]); + tmp22 = c_im(W[1]); + tmp24 = (tmp20 * tmp21) + (tmp22 * tmp23); + tmp97 = (tmp20 * tmp23) - (tmp22 * tmp21); + } + { + fftw_real tmp26; + fftw_real tmp28; + fftw_real tmp25; + fftw_real tmp27; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(inout[10 * iostride]); + tmp28 = c_im(inout[10 * iostride]); + tmp25 = c_re(W[9]); + tmp27 = c_im(W[9]); + tmp29 = (tmp25 * tmp26) + (tmp27 * tmp28); + tmp98 = (tmp25 * tmp28) - (tmp27 * tmp26); + } + tmp30 = tmp24 + tmp29; + tmp153 = tmp97 + tmp98; + tmp96 = tmp24 - tmp29; + tmp99 = tmp97 - tmp98; + tmp100 = tmp96 - tmp99; + tmp137 = tmp96 + tmp99; + } + { + fftw_real tmp35; + fftw_real tmp102; + fftw_real tmp40; + fftw_real tmp103; + fftw_real tmp101; + fftw_real tmp104; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp32; + fftw_real tmp34; + fftw_real tmp31; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(inout[14 * iostride]); + tmp34 = c_im(inout[14 * iostride]); + tmp31 = c_re(W[13]); + tmp33 = c_im(W[13]); + tmp35 = (tmp31 * tmp32) + (tmp33 * tmp34); + tmp102 = (tmp31 * tmp34) - (tmp33 * tmp32); + } + { + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp36; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp37 = c_re(inout[6 * iostride]); + tmp39 = c_im(inout[6 * iostride]); + tmp36 = c_re(W[5]); + tmp38 = c_im(W[5]); + tmp40 = (tmp36 * tmp37) + (tmp38 * tmp39); + tmp103 = (tmp36 * tmp39) - (tmp38 * tmp37); + } + tmp41 = tmp35 + tmp40; + tmp152 = tmp102 + tmp103; + tmp101 = tmp35 - tmp40; + tmp104 = tmp102 - tmp103; + tmp105 = tmp101 + tmp104; + tmp136 = tmp104 - tmp101; + } + { + fftw_real tmp48; + fftw_real tmp119; + fftw_real tmp64; + fftw_real tmp126; + fftw_real tmp53; + fftw_real tmp120; + fftw_real tmp59; + fftw_real tmp125; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp45; + fftw_real tmp47; + fftw_real tmp44; + fftw_real tmp46; + ASSERT_ALIGNED_DOUBLE; + tmp45 = c_re(inout[iostride]); + tmp47 = c_im(inout[iostride]); + tmp44 = c_re(W[0]); + tmp46 = c_im(W[0]); + tmp48 = (tmp44 * tmp45) + (tmp46 * tmp47); + tmp119 = (tmp44 * tmp47) - (tmp46 * tmp45); + } + { + fftw_real tmp61; + fftw_real tmp63; + fftw_real tmp60; + fftw_real tmp62; + ASSERT_ALIGNED_DOUBLE; + tmp61 = c_re(inout[13 * iostride]); + tmp63 = c_im(inout[13 * iostride]); + tmp60 = c_re(W[12]); + tmp62 = c_im(W[12]); + tmp64 = (tmp60 * tmp61) + (tmp62 * tmp63); + tmp126 = (tmp60 * tmp63) - (tmp62 * tmp61); + } + { + fftw_real tmp50; + fftw_real tmp52; + fftw_real tmp49; + fftw_real tmp51; + ASSERT_ALIGNED_DOUBLE; + tmp50 = c_re(inout[9 * iostride]); + tmp52 = c_im(inout[9 * iostride]); + tmp49 = c_re(W[8]); + tmp51 = c_im(W[8]); + tmp53 = (tmp49 * tmp50) + (tmp51 * tmp52); + tmp120 = (tmp49 * tmp52) - (tmp51 * tmp50); + } + { + fftw_real tmp56; + fftw_real tmp58; + fftw_real tmp55; + fftw_real tmp57; + ASSERT_ALIGNED_DOUBLE; + tmp56 = c_re(inout[5 * iostride]); + tmp58 = c_im(inout[5 * iostride]); + tmp55 = c_re(W[4]); + tmp57 = c_im(W[4]); + tmp59 = (tmp55 * tmp56) + (tmp57 * tmp58); + tmp125 = (tmp55 * tmp58) - (tmp57 * tmp56); + } + { + fftw_real tmp124; + fftw_real tmp127; + fftw_real tmp121; + fftw_real tmp122; + ASSERT_ALIGNED_DOUBLE; + tmp54 = tmp48 + tmp53; + tmp65 = tmp59 + tmp64; + tmp156 = tmp54 - tmp65; + tmp124 = tmp48 - tmp53; + tmp127 = tmp125 - tmp126; + tmp128 = tmp124 - tmp127; + tmp144 = tmp124 + tmp127; + tmp157 = tmp119 + tmp120; + tmp158 = tmp125 + tmp126; + tmp159 = tmp157 - tmp158; + tmp121 = tmp119 - tmp120; + tmp122 = tmp59 - tmp64; + tmp123 = tmp121 + tmp122; + tmp143 = tmp121 - tmp122; + } + } + { + fftw_real tmp107; + fftw_real tmp131; + fftw_real tmp196; + fftw_real tmp198; + fftw_real tmp130; + fftw_real tmp191; + fftw_real tmp134; + fftw_real tmp197; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp95; + fftw_real tmp106; + fftw_real tmp192; + fftw_real tmp195; + ASSERT_ALIGNED_DOUBLE; + tmp95 = tmp91 - tmp94; + tmp106 = K707106781 * (tmp100 + tmp105); + tmp107 = tmp95 - tmp106; + tmp131 = tmp95 + tmp106; + tmp192 = K707106781 * (tmp137 + tmp136); + tmp195 = tmp193 + tmp194; + tmp196 = tmp192 + tmp195; + tmp198 = tmp195 - tmp192; + } + { + fftw_real tmp118; + fftw_real tmp129; + fftw_real tmp132; + fftw_real tmp133; + ASSERT_ALIGNED_DOUBLE; + tmp118 = (K923879532 * tmp112) - (K382683432 * tmp117); + tmp129 = (K923879532 * tmp123) + (K382683432 * tmp128); + tmp130 = tmp118 - tmp129; + tmp191 = tmp129 + tmp118; + tmp132 = (K923879532 * tmp128) - (K382683432 * tmp123); + tmp133 = (K382683432 * tmp112) + (K923879532 * tmp117); + tmp134 = tmp132 + tmp133; + tmp197 = tmp132 - tmp133; + } + c_re(inout[13 * iostride]) = tmp107 - tmp130; + c_re(inout[5 * iostride]) = tmp107 + tmp130; + c_re(inout[9 * iostride]) = tmp131 - tmp134; + c_re(inout[iostride]) = tmp131 + tmp134; + c_im(inout[iostride]) = tmp191 + tmp196; + c_im(inout[9 * iostride]) = tmp196 - tmp191; + c_im(inout[5 * iostride]) = tmp197 + tmp198; + c_im(inout[13 * iostride]) = tmp198 - tmp197; + } + { + fftw_real tmp139; + fftw_real tmp147; + fftw_real tmp202; + fftw_real tmp204; + fftw_real tmp146; + fftw_real tmp199; + fftw_real tmp150; + fftw_real tmp203; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp135; + fftw_real tmp138; + fftw_real tmp200; + fftw_real tmp201; + ASSERT_ALIGNED_DOUBLE; + tmp135 = tmp91 + tmp94; + tmp138 = K707106781 * (tmp136 - tmp137); + tmp139 = tmp135 - tmp138; + tmp147 = tmp135 + tmp138; + tmp200 = K707106781 * (tmp100 - tmp105); + tmp201 = tmp194 - tmp193; + tmp202 = tmp200 + tmp201; + tmp204 = tmp201 - tmp200; + } + { + fftw_real tmp142; + fftw_real tmp145; + fftw_real tmp148; + fftw_real tmp149; + ASSERT_ALIGNED_DOUBLE; + tmp142 = (K382683432 * tmp140) - (K923879532 * tmp141); + tmp145 = (K382683432 * tmp143) + (K923879532 * tmp144); + tmp146 = tmp142 - tmp145; + tmp199 = tmp145 + tmp142; + tmp148 = (K382683432 * tmp144) - (K923879532 * tmp143); + tmp149 = (K923879532 * tmp140) + (K382683432 * tmp141); + tmp150 = tmp148 + tmp149; + tmp203 = tmp148 - tmp149; + } + c_re(inout[15 * iostride]) = tmp139 - tmp146; + c_re(inout[7 * iostride]) = tmp139 + tmp146; + c_re(inout[11 * iostride]) = tmp147 - tmp150; + c_re(inout[3 * iostride]) = tmp147 + tmp150; + c_im(inout[3 * iostride]) = tmp199 + tmp202; + c_im(inout[11 * iostride]) = tmp202 - tmp199; + c_im(inout[7 * iostride]) = tmp203 + tmp204; + c_im(inout[15 * iostride]) = tmp204 - tmp203; + } + { + fftw_real tmp155; + fftw_real tmp167; + fftw_real tmp188; + fftw_real tmp190; + fftw_real tmp166; + fftw_real tmp189; + fftw_real tmp170; + fftw_real tmp185; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp151; + fftw_real tmp154; + fftw_real tmp186; + fftw_real tmp187; + ASSERT_ALIGNED_DOUBLE; + tmp151 = tmp7 - tmp18; + tmp154 = tmp152 - tmp153; + tmp155 = tmp151 + tmp154; + tmp167 = tmp151 - tmp154; + tmp186 = tmp30 - tmp41; + tmp187 = tmp180 - tmp177; + tmp188 = tmp186 + tmp187; + tmp190 = tmp187 - tmp186; + } + { + fftw_real tmp160; + fftw_real tmp165; + fftw_real tmp168; + fftw_real tmp169; + ASSERT_ALIGNED_DOUBLE; + tmp160 = tmp156 - tmp159; + tmp165 = tmp161 + tmp164; + tmp166 = K707106781 * (tmp160 + tmp165); + tmp189 = K707106781 * (tmp160 - tmp165); + tmp168 = tmp164 - tmp161; + tmp169 = tmp156 + tmp159; + tmp170 = K707106781 * (tmp168 - tmp169); + tmp185 = K707106781 * (tmp169 + tmp168); + } + c_re(inout[10 * iostride]) = tmp155 - tmp166; + c_re(inout[2 * iostride]) = tmp155 + tmp166; + c_re(inout[14 * iostride]) = tmp167 - tmp170; + c_re(inout[6 * iostride]) = tmp167 + tmp170; + c_im(inout[2 * iostride]) = tmp185 + tmp188; + c_im(inout[10 * iostride]) = tmp188 - tmp185; + c_im(inout[6 * iostride]) = tmp189 + tmp190; + c_im(inout[14 * iostride]) = tmp190 - tmp189; + } + { + fftw_real tmp43; + fftw_real tmp171; + fftw_real tmp182; + fftw_real tmp184; + fftw_real tmp90; + fftw_real tmp183; + fftw_real tmp174; + fftw_real tmp175; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp19; + fftw_real tmp42; + fftw_real tmp176; + fftw_real tmp181; + ASSERT_ALIGNED_DOUBLE; + tmp19 = tmp7 + tmp18; + tmp42 = tmp30 + tmp41; + tmp43 = tmp19 + tmp42; + tmp171 = tmp19 - tmp42; + tmp176 = tmp153 + tmp152; + tmp181 = tmp177 + tmp180; + tmp182 = tmp176 + tmp181; + tmp184 = tmp181 - tmp176; + } + { + fftw_real tmp66; + fftw_real tmp89; + fftw_real tmp172; + fftw_real tmp173; + ASSERT_ALIGNED_DOUBLE; + tmp66 = tmp54 + tmp65; + tmp89 = tmp77 + tmp88; + tmp90 = tmp66 + tmp89; + tmp183 = tmp66 - tmp89; + tmp172 = tmp162 + tmp163; + tmp173 = tmp157 + tmp158; + tmp174 = tmp172 - tmp173; + tmp175 = tmp173 + tmp172; + } + c_re(inout[8 * iostride]) = tmp43 - tmp90; + c_re(inout[0]) = tmp43 + tmp90; + c_re(inout[12 * iostride]) = tmp171 - tmp174; + c_re(inout[4 * iostride]) = tmp171 + tmp174; + c_im(inout[0]) = tmp175 + tmp182; + c_im(inout[8 * iostride]) = tmp182 - tmp175; + c_im(inout[4 * iostride]) = tmp183 + tmp184; + c_im(inout[12 * iostride]) = tmp184 - tmp183; + } + } +} + +static const int twiddle_order[] = + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; +fftw_codelet_desc fftwi_twiddle_16_desc = { + "fftwi_twiddle_16", + (void (*)()) fftwi_twiddle_16, + 16, + FFTW_BACKWARD, + FFTW_TWIDDLE, + 363, + 15, + twiddle_order, +}; diff --git a/src/fftw/ftwi_2.c b/src/fftw/ftwi_2.c new file mode 100644 index 0000000..cd0717c --- /dev/null +++ b/src/fftw/ftwi_2.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:08:24 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 2 */ + +/* + * This function contains 6 FP additions, 4 FP multiplications, + * (or, 4 additions, 2 multiplications, 2 fused multiply/add), + * 10 stack variables, and 8 memory accesses + */ + +/* + * Generator Id's : + * $Id: ftwi_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_twiddle_2(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 1) { + fftw_real tmp1; + fftw_real tmp8; + fftw_real tmp6; + fftw_real tmp7; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp8 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[iostride]); + tmp5 = c_im(inout[iostride]); + tmp2 = c_re(W[0]); + tmp4 = c_im(W[0]); + tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5); + tmp7 = (tmp2 * tmp5) - (tmp4 * tmp3); + } + c_re(inout[iostride]) = tmp1 - tmp6; + c_re(inout[0]) = tmp1 + tmp6; + c_im(inout[0]) = tmp7 + tmp8; + c_im(inout[iostride]) = tmp8 - tmp7; + } +} + +static const int twiddle_order[] = { 1 }; +fftw_codelet_desc fftwi_twiddle_2_desc = { + "fftwi_twiddle_2", + (void (*)()) fftwi_twiddle_2, + 2, + FFTW_BACKWARD, + FFTW_TWIDDLE, + 55, + 1, + twiddle_order, +}; diff --git a/src/fftw/ftwi_3.c b/src/fftw/ftwi_3.c new file mode 100644 index 0000000..45808a3 --- /dev/null +++ b/src/fftw/ftwi_3.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:08:24 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 3 */ + +/* + * This function contains 16 FP additions, 12 FP multiplications, + * (or, 10 additions, 6 multiplications, 6 fused multiply/add), + * 14 stack variables, and 12 memory accesses + */ +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); + +/* + * Generator Id's : + * $Id: ftwi_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_twiddle_3(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 2) { + fftw_real tmp1; + fftw_real tmp18; + fftw_real tmp6; + fftw_real tmp15; + fftw_real tmp11; + fftw_real tmp14; + fftw_real tmp12; + fftw_real tmp17; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp18 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[iostride]); + tmp5 = c_im(inout[iostride]); + tmp2 = c_re(W[0]); + tmp4 = c_im(W[0]); + tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5); + tmp15 = (tmp2 * tmp5) - (tmp4 * tmp3); + } + { + fftw_real tmp8; + fftw_real tmp10; + fftw_real tmp7; + fftw_real tmp9; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(inout[2 * iostride]); + tmp10 = c_im(inout[2 * iostride]); + tmp7 = c_re(W[1]); + tmp9 = c_im(W[1]); + tmp11 = (tmp7 * tmp8) + (tmp9 * tmp10); + tmp14 = (tmp7 * tmp10) - (tmp9 * tmp8); + } + tmp12 = tmp6 + tmp11; + tmp17 = tmp15 + tmp14; + { + fftw_real tmp13; + fftw_real tmp16; + fftw_real tmp19; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + c_re(inout[0]) = tmp1 + tmp12; + tmp13 = tmp1 - (K500000000 * tmp12); + tmp16 = K866025403 * (tmp14 - tmp15); + c_re(inout[2 * iostride]) = tmp13 - tmp16; + c_re(inout[iostride]) = tmp13 + tmp16; + c_im(inout[0]) = tmp17 + tmp18; + tmp19 = K866025403 * (tmp6 - tmp11); + tmp20 = tmp18 - (K500000000 * tmp17); + c_im(inout[iostride]) = tmp19 + tmp20; + c_im(inout[2 * iostride]) = tmp20 - tmp19; + } + } +} + +static const int twiddle_order[] = { 1, 2 }; +fftw_codelet_desc fftwi_twiddle_3_desc = { + "fftwi_twiddle_3", + (void (*)()) fftwi_twiddle_3, + 3, + FFTW_BACKWARD, + FFTW_TWIDDLE, + 77, + 2, + twiddle_order, +}; diff --git a/src/fftw/ftwi_32.c b/src/fftw/ftwi_32.c new file mode 100644 index 0000000..a392746 --- /dev/null +++ b/src/fftw/ftwi_32.c @@ -0,0 +1,1398 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:08:35 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 32 */ + +/* + * This function contains 434 FP additions, 208 FP multiplications, + * (or, 340 additions, 114 multiplications, 94 fused multiply/add), + * 90 stack variables, and 128 memory accesses + */ +static const fftw_real K555570233 = +FFTW_KONST(+0.555570233019602224742830813948532874374937191); +static const fftw_real K831469612 = +FFTW_KONST(+0.831469612302545237078788377617905756738560812); +static const fftw_real K980785280 = +FFTW_KONST(+0.980785280403230449126182236134239036973933731); +static const fftw_real K195090322 = +FFTW_KONST(+0.195090322016128267848284868477022240927691618); +static const fftw_real K923879532 = +FFTW_KONST(+0.923879532511286756128183189396788286822416626); +static const fftw_real K382683432 = +FFTW_KONST(+0.382683432365089771728459984030398866761344562); +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: ftwi_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_twiddle_32(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 31) { + fftw_real tmp19; + fftw_real tmp387; + fftw_real tmp472; + fftw_real tmp486; + fftw_real tmp442; + fftw_real tmp456; + fftw_real tmp191; + fftw_real tmp303; + fftw_real tmp161; + fftw_real tmp403; + fftw_real tmp276; + fftw_real tmp316; + fftw_real tmp372; + fftw_real tmp400; + fftw_real tmp259; + fftw_real tmp319; + fftw_real tmp42; + fftw_real tmp455; + fftw_real tmp201; + fftw_real tmp304; + fftw_real tmp390; + fftw_real tmp437; + fftw_real tmp196; + fftw_real tmp305; + fftw_real tmp184; + fftw_real tmp401; + fftw_real tmp375; + fftw_real tmp404; + fftw_real tmp270; + fftw_real tmp317; + fftw_real tmp279; + fftw_real tmp320; + fftw_real tmp66; + fftw_real tmp395; + fftw_real tmp224; + fftw_real tmp312; + fftw_real tmp357; + fftw_real tmp396; + fftw_real tmp219; + fftw_real tmp311; + fftw_real tmp114; + fftw_real tmp410; + fftw_real tmp249; + fftw_real tmp323; + fftw_real tmp363; + fftw_real tmp407; + fftw_real tmp232; + fftw_real tmp326; + fftw_real tmp89; + fftw_real tmp393; + fftw_real tmp213; + fftw_real tmp309; + fftw_real tmp354; + fftw_real tmp392; + fftw_real tmp208; + fftw_real tmp308; + fftw_real tmp137; + fftw_real tmp408; + fftw_real tmp366; + fftw_real tmp411; + fftw_real tmp243; + fftw_real tmp324; + fftw_real tmp252; + fftw_real tmp327; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp440; + fftw_real tmp6; + fftw_real tmp439; + fftw_real tmp12; + fftw_real tmp188; + fftw_real tmp17; + fftw_real tmp189; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp440 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[16 * iostride]); + tmp5 = c_im(inout[16 * iostride]); + tmp2 = c_re(W[15]); + tmp4 = c_im(W[15]); + tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5); + tmp439 = (tmp2 * tmp5) - (tmp4 * tmp3); + } + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[8 * iostride]); + tmp11 = c_im(inout[8 * iostride]); + tmp8 = c_re(W[7]); + tmp10 = c_im(W[7]); + tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11); + tmp188 = (tmp8 * tmp11) - (tmp10 * tmp9); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[24 * iostride]); + tmp16 = c_im(inout[24 * iostride]); + tmp13 = c_re(W[23]); + tmp15 = c_im(W[23]); + tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16); + tmp189 = (tmp13 * tmp16) - (tmp15 * tmp14); + } + { + fftw_real tmp7; + fftw_real tmp18; + fftw_real tmp470; + fftw_real tmp471; + ASSERT_ALIGNED_DOUBLE; + tmp7 = tmp1 + tmp6; + tmp18 = tmp12 + tmp17; + tmp19 = tmp7 + tmp18; + tmp387 = tmp7 - tmp18; + tmp470 = tmp12 - tmp17; + tmp471 = tmp440 - tmp439; + tmp472 = tmp470 + tmp471; + tmp486 = tmp471 - tmp470; + } + { + fftw_real tmp438; + fftw_real tmp441; + fftw_real tmp187; + fftw_real tmp190; + ASSERT_ALIGNED_DOUBLE; + tmp438 = tmp188 + tmp189; + tmp441 = tmp439 + tmp440; + tmp442 = tmp438 + tmp441; + tmp456 = tmp441 - tmp438; + tmp187 = tmp1 - tmp6; + tmp190 = tmp188 - tmp189; + tmp191 = tmp187 - tmp190; + tmp303 = tmp187 + tmp190; + } + } + { + fftw_real tmp143; + fftw_real tmp272; + fftw_real tmp159; + fftw_real tmp257; + fftw_real tmp148; + fftw_real tmp273; + fftw_real tmp154; + fftw_real tmp256; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp140; + fftw_real tmp142; + fftw_real tmp139; + fftw_real tmp141; + ASSERT_ALIGNED_DOUBLE; + tmp140 = c_re(inout[31 * iostride]); + tmp142 = c_im(inout[31 * iostride]); + tmp139 = c_re(W[30]); + tmp141 = c_im(W[30]); + tmp143 = (tmp139 * tmp140) + (tmp141 * tmp142); + tmp272 = (tmp139 * tmp142) - (tmp141 * tmp140); + } + { + fftw_real tmp156; + fftw_real tmp158; + fftw_real tmp155; + fftw_real tmp157; + ASSERT_ALIGNED_DOUBLE; + tmp156 = c_re(inout[23 * iostride]); + tmp158 = c_im(inout[23 * iostride]); + tmp155 = c_re(W[22]); + tmp157 = c_im(W[22]); + tmp159 = (tmp155 * tmp156) + (tmp157 * tmp158); + tmp257 = (tmp155 * tmp158) - (tmp157 * tmp156); + } + { + fftw_real tmp145; + fftw_real tmp147; + fftw_real tmp144; + fftw_real tmp146; + ASSERT_ALIGNED_DOUBLE; + tmp145 = c_re(inout[15 * iostride]); + tmp147 = c_im(inout[15 * iostride]); + tmp144 = c_re(W[14]); + tmp146 = c_im(W[14]); + tmp148 = (tmp144 * tmp145) + (tmp146 * tmp147); + tmp273 = (tmp144 * tmp147) - (tmp146 * tmp145); + } + { + fftw_real tmp151; + fftw_real tmp153; + fftw_real tmp150; + fftw_real tmp152; + ASSERT_ALIGNED_DOUBLE; + tmp151 = c_re(inout[7 * iostride]); + tmp153 = c_im(inout[7 * iostride]); + tmp150 = c_re(W[6]); + tmp152 = c_im(W[6]); + tmp154 = (tmp150 * tmp151) + (tmp152 * tmp153); + tmp256 = (tmp150 * tmp153) - (tmp152 * tmp151); + } + { + fftw_real tmp149; + fftw_real tmp160; + fftw_real tmp274; + fftw_real tmp275; + ASSERT_ALIGNED_DOUBLE; + tmp149 = tmp143 + tmp148; + tmp160 = tmp154 + tmp159; + tmp161 = tmp149 + tmp160; + tmp403 = tmp149 - tmp160; + tmp274 = tmp272 - tmp273; + tmp275 = tmp154 - tmp159; + tmp276 = tmp274 + tmp275; + tmp316 = tmp274 - tmp275; + } + { + fftw_real tmp370; + fftw_real tmp371; + fftw_real tmp255; + fftw_real tmp258; + ASSERT_ALIGNED_DOUBLE; + tmp370 = tmp272 + tmp273; + tmp371 = tmp256 + tmp257; + tmp372 = tmp370 + tmp371; + tmp400 = tmp370 - tmp371; + tmp255 = tmp143 - tmp148; + tmp258 = tmp256 - tmp257; + tmp259 = tmp255 - tmp258; + tmp319 = tmp255 + tmp258; + } + } + { + fftw_real tmp24; + fftw_real tmp193; + fftw_real tmp40; + fftw_real tmp199; + fftw_real tmp29; + fftw_real tmp194; + fftw_real tmp35; + fftw_real tmp198; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp21; + fftw_real tmp23; + fftw_real tmp20; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp21 = c_re(inout[4 * iostride]); + tmp23 = c_im(inout[4 * iostride]); + tmp20 = c_re(W[3]); + tmp22 = c_im(W[3]); + tmp24 = (tmp20 * tmp21) + (tmp22 * tmp23); + tmp193 = (tmp20 * tmp23) - (tmp22 * tmp21); + } + { + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp36; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp37 = c_re(inout[12 * iostride]); + tmp39 = c_im(inout[12 * iostride]); + tmp36 = c_re(W[11]); + tmp38 = c_im(W[11]); + tmp40 = (tmp36 * tmp37) + (tmp38 * tmp39); + tmp199 = (tmp36 * tmp39) - (tmp38 * tmp37); + } + { + fftw_real tmp26; + fftw_real tmp28; + fftw_real tmp25; + fftw_real tmp27; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(inout[20 * iostride]); + tmp28 = c_im(inout[20 * iostride]); + tmp25 = c_re(W[19]); + tmp27 = c_im(W[19]); + tmp29 = (tmp25 * tmp26) + (tmp27 * tmp28); + tmp194 = (tmp25 * tmp28) - (tmp27 * tmp26); + } + { + fftw_real tmp32; + fftw_real tmp34; + fftw_real tmp31; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(inout[28 * iostride]); + tmp34 = c_im(inout[28 * iostride]); + tmp31 = c_re(W[27]); + tmp33 = c_im(W[27]); + tmp35 = (tmp31 * tmp32) + (tmp33 * tmp34); + tmp198 = (tmp31 * tmp34) - (tmp33 * tmp32); + } + { + fftw_real tmp30; + fftw_real tmp41; + fftw_real tmp197; + fftw_real tmp200; + ASSERT_ALIGNED_DOUBLE; + tmp30 = tmp24 + tmp29; + tmp41 = tmp35 + tmp40; + tmp42 = tmp30 + tmp41; + tmp455 = tmp30 - tmp41; + tmp197 = tmp35 - tmp40; + tmp200 = tmp198 - tmp199; + tmp201 = tmp197 + tmp200; + tmp304 = tmp200 - tmp197; + } + { + fftw_real tmp388; + fftw_real tmp389; + fftw_real tmp192; + fftw_real tmp195; + ASSERT_ALIGNED_DOUBLE; + tmp388 = tmp198 + tmp199; + tmp389 = tmp193 + tmp194; + tmp390 = tmp388 - tmp389; + tmp437 = tmp389 + tmp388; + tmp192 = tmp24 - tmp29; + tmp195 = tmp193 - tmp194; + tmp196 = tmp192 - tmp195; + tmp305 = tmp192 + tmp195; + } + } + { + fftw_real tmp166; + fftw_real tmp261; + fftw_real tmp171; + fftw_real tmp262; + fftw_real tmp260; + fftw_real tmp263; + fftw_real tmp177; + fftw_real tmp266; + fftw_real tmp182; + fftw_real tmp267; + fftw_real tmp265; + fftw_real tmp268; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp163; + fftw_real tmp165; + fftw_real tmp162; + fftw_real tmp164; + ASSERT_ALIGNED_DOUBLE; + tmp163 = c_re(inout[3 * iostride]); + tmp165 = c_im(inout[3 * iostride]); + tmp162 = c_re(W[2]); + tmp164 = c_im(W[2]); + tmp166 = (tmp162 * tmp163) + (tmp164 * tmp165); + tmp261 = (tmp162 * tmp165) - (tmp164 * tmp163); + } + { + fftw_real tmp168; + fftw_real tmp170; + fftw_real tmp167; + fftw_real tmp169; + ASSERT_ALIGNED_DOUBLE; + tmp168 = c_re(inout[19 * iostride]); + tmp170 = c_im(inout[19 * iostride]); + tmp167 = c_re(W[18]); + tmp169 = c_im(W[18]); + tmp171 = (tmp167 * tmp168) + (tmp169 * tmp170); + tmp262 = (tmp167 * tmp170) - (tmp169 * tmp168); + } + tmp260 = tmp166 - tmp171; + tmp263 = tmp261 - tmp262; + { + fftw_real tmp174; + fftw_real tmp176; + fftw_real tmp173; + fftw_real tmp175; + ASSERT_ALIGNED_DOUBLE; + tmp174 = c_re(inout[27 * iostride]); + tmp176 = c_im(inout[27 * iostride]); + tmp173 = c_re(W[26]); + tmp175 = c_im(W[26]); + tmp177 = (tmp173 * tmp174) + (tmp175 * tmp176); + tmp266 = (tmp173 * tmp176) - (tmp175 * tmp174); + } + { + fftw_real tmp179; + fftw_real tmp181; + fftw_real tmp178; + fftw_real tmp180; + ASSERT_ALIGNED_DOUBLE; + tmp179 = c_re(inout[11 * iostride]); + tmp181 = c_im(inout[11 * iostride]); + tmp178 = c_re(W[10]); + tmp180 = c_im(W[10]); + tmp182 = (tmp178 * tmp179) + (tmp180 * tmp181); + tmp267 = (tmp178 * tmp181) - (tmp180 * tmp179); + } + tmp265 = tmp177 - tmp182; + tmp268 = tmp266 - tmp267; + { + fftw_real tmp172; + fftw_real tmp183; + fftw_real tmp373; + fftw_real tmp374; + ASSERT_ALIGNED_DOUBLE; + tmp172 = tmp166 + tmp171; + tmp183 = tmp177 + tmp182; + tmp184 = tmp172 + tmp183; + tmp401 = tmp172 - tmp183; + tmp373 = tmp261 + tmp262; + tmp374 = tmp266 + tmp267; + tmp375 = tmp373 + tmp374; + tmp404 = tmp374 - tmp373; + } + { + fftw_real tmp264; + fftw_real tmp269; + fftw_real tmp277; + fftw_real tmp278; + ASSERT_ALIGNED_DOUBLE; + tmp264 = tmp260 - tmp263; + tmp269 = tmp265 + tmp268; + tmp270 = K707106781 * (tmp264 + tmp269); + tmp317 = K707106781 * (tmp264 - tmp269); + tmp277 = tmp260 + tmp263; + tmp278 = tmp268 - tmp265; + tmp279 = K707106781 * (tmp277 + tmp278); + tmp320 = K707106781 * (tmp278 - tmp277); + } + } + { + fftw_real tmp48; + fftw_real tmp215; + fftw_real tmp64; + fftw_real tmp222; + fftw_real tmp53; + fftw_real tmp216; + fftw_real tmp59; + fftw_real tmp221; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp45; + fftw_real tmp47; + fftw_real tmp44; + fftw_real tmp46; + ASSERT_ALIGNED_DOUBLE; + tmp45 = c_re(inout[2 * iostride]); + tmp47 = c_im(inout[2 * iostride]); + tmp44 = c_re(W[1]); + tmp46 = c_im(W[1]); + tmp48 = (tmp44 * tmp45) + (tmp46 * tmp47); + tmp215 = (tmp44 * tmp47) - (tmp46 * tmp45); + } + { + fftw_real tmp61; + fftw_real tmp63; + fftw_real tmp60; + fftw_real tmp62; + ASSERT_ALIGNED_DOUBLE; + tmp61 = c_re(inout[26 * iostride]); + tmp63 = c_im(inout[26 * iostride]); + tmp60 = c_re(W[25]); + tmp62 = c_im(W[25]); + tmp64 = (tmp60 * tmp61) + (tmp62 * tmp63); + tmp222 = (tmp60 * tmp63) - (tmp62 * tmp61); + } + { + fftw_real tmp50; + fftw_real tmp52; + fftw_real tmp49; + fftw_real tmp51; + ASSERT_ALIGNED_DOUBLE; + tmp50 = c_re(inout[18 * iostride]); + tmp52 = c_im(inout[18 * iostride]); + tmp49 = c_re(W[17]); + tmp51 = c_im(W[17]); + tmp53 = (tmp49 * tmp50) + (tmp51 * tmp52); + tmp216 = (tmp49 * tmp52) - (tmp51 * tmp50); + } + { + fftw_real tmp56; + fftw_real tmp58; + fftw_real tmp55; + fftw_real tmp57; + ASSERT_ALIGNED_DOUBLE; + tmp56 = c_re(inout[10 * iostride]); + tmp58 = c_im(inout[10 * iostride]); + tmp55 = c_re(W[9]); + tmp57 = c_im(W[9]); + tmp59 = (tmp55 * tmp56) + (tmp57 * tmp58); + tmp221 = (tmp55 * tmp58) - (tmp57 * tmp56); + } + { + fftw_real tmp54; + fftw_real tmp65; + fftw_real tmp220; + fftw_real tmp223; + ASSERT_ALIGNED_DOUBLE; + tmp54 = tmp48 + tmp53; + tmp65 = tmp59 + tmp64; + tmp66 = tmp54 + tmp65; + tmp395 = tmp54 - tmp65; + tmp220 = tmp48 - tmp53; + tmp223 = tmp221 - tmp222; + tmp224 = tmp220 - tmp223; + tmp312 = tmp220 + tmp223; + } + { + fftw_real tmp355; + fftw_real tmp356; + fftw_real tmp217; + fftw_real tmp218; + ASSERT_ALIGNED_DOUBLE; + tmp355 = tmp215 + tmp216; + tmp356 = tmp221 + tmp222; + tmp357 = tmp355 + tmp356; + tmp396 = tmp355 - tmp356; + tmp217 = tmp215 - tmp216; + tmp218 = tmp59 - tmp64; + tmp219 = tmp217 + tmp218; + tmp311 = tmp217 - tmp218; + } + } + { + fftw_real tmp96; + fftw_real tmp245; + fftw_real tmp112; + fftw_real tmp230; + fftw_real tmp101; + fftw_real tmp246; + fftw_real tmp107; + fftw_real tmp229; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp93; + fftw_real tmp95; + fftw_real tmp92; + fftw_real tmp94; + ASSERT_ALIGNED_DOUBLE; + tmp93 = c_re(inout[iostride]); + tmp95 = c_im(inout[iostride]); + tmp92 = c_re(W[0]); + tmp94 = c_im(W[0]); + tmp96 = (tmp92 * tmp93) + (tmp94 * tmp95); + tmp245 = (tmp92 * tmp95) - (tmp94 * tmp93); + } + { + fftw_real tmp109; + fftw_real tmp111; + fftw_real tmp108; + fftw_real tmp110; + ASSERT_ALIGNED_DOUBLE; + tmp109 = c_re(inout[25 * iostride]); + tmp111 = c_im(inout[25 * iostride]); + tmp108 = c_re(W[24]); + tmp110 = c_im(W[24]); + tmp112 = (tmp108 * tmp109) + (tmp110 * tmp111); + tmp230 = (tmp108 * tmp111) - (tmp110 * tmp109); + } + { + fftw_real tmp98; + fftw_real tmp100; + fftw_real tmp97; + fftw_real tmp99; + ASSERT_ALIGNED_DOUBLE; + tmp98 = c_re(inout[17 * iostride]); + tmp100 = c_im(inout[17 * iostride]); + tmp97 = c_re(W[16]); + tmp99 = c_im(W[16]); + tmp101 = (tmp97 * tmp98) + (tmp99 * tmp100); + tmp246 = (tmp97 * tmp100) - (tmp99 * tmp98); + } + { + fftw_real tmp104; + fftw_real tmp106; + fftw_real tmp103; + fftw_real tmp105; + ASSERT_ALIGNED_DOUBLE; + tmp104 = c_re(inout[9 * iostride]); + tmp106 = c_im(inout[9 * iostride]); + tmp103 = c_re(W[8]); + tmp105 = c_im(W[8]); + tmp107 = (tmp103 * tmp104) + (tmp105 * tmp106); + tmp229 = (tmp103 * tmp106) - (tmp105 * tmp104); + } + { + fftw_real tmp102; + fftw_real tmp113; + fftw_real tmp247; + fftw_real tmp248; + ASSERT_ALIGNED_DOUBLE; + tmp102 = tmp96 + tmp101; + tmp113 = tmp107 + tmp112; + tmp114 = tmp102 + tmp113; + tmp410 = tmp102 - tmp113; + tmp247 = tmp245 - tmp246; + tmp248 = tmp107 - tmp112; + tmp249 = tmp247 + tmp248; + tmp323 = tmp247 - tmp248; + } + { + fftw_real tmp361; + fftw_real tmp362; + fftw_real tmp228; + fftw_real tmp231; + ASSERT_ALIGNED_DOUBLE; + tmp361 = tmp245 + tmp246; + tmp362 = tmp229 + tmp230; + tmp363 = tmp361 + tmp362; + tmp407 = tmp361 - tmp362; + tmp228 = tmp96 - tmp101; + tmp231 = tmp229 - tmp230; + tmp232 = tmp228 - tmp231; + tmp326 = tmp228 + tmp231; + } + } + { + fftw_real tmp71; + fftw_real tmp204; + fftw_real tmp87; + fftw_real tmp211; + fftw_real tmp76; + fftw_real tmp205; + fftw_real tmp82; + fftw_real tmp210; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp68; + fftw_real tmp70; + fftw_real tmp67; + fftw_real tmp69; + ASSERT_ALIGNED_DOUBLE; + tmp68 = c_re(inout[30 * iostride]); + tmp70 = c_im(inout[30 * iostride]); + tmp67 = c_re(W[29]); + tmp69 = c_im(W[29]); + tmp71 = (tmp67 * tmp68) + (tmp69 * tmp70); + tmp204 = (tmp67 * tmp70) - (tmp69 * tmp68); + } + { + fftw_real tmp84; + fftw_real tmp86; + fftw_real tmp83; + fftw_real tmp85; + ASSERT_ALIGNED_DOUBLE; + tmp84 = c_re(inout[22 * iostride]); + tmp86 = c_im(inout[22 * iostride]); + tmp83 = c_re(W[21]); + tmp85 = c_im(W[21]); + tmp87 = (tmp83 * tmp84) + (tmp85 * tmp86); + tmp211 = (tmp83 * tmp86) - (tmp85 * tmp84); + } + { + fftw_real tmp73; + fftw_real tmp75; + fftw_real tmp72; + fftw_real tmp74; + ASSERT_ALIGNED_DOUBLE; + tmp73 = c_re(inout[14 * iostride]); + tmp75 = c_im(inout[14 * iostride]); + tmp72 = c_re(W[13]); + tmp74 = c_im(W[13]); + tmp76 = (tmp72 * tmp73) + (tmp74 * tmp75); + tmp205 = (tmp72 * tmp75) - (tmp74 * tmp73); + } + { + fftw_real tmp79; + fftw_real tmp81; + fftw_real tmp78; + fftw_real tmp80; + ASSERT_ALIGNED_DOUBLE; + tmp79 = c_re(inout[6 * iostride]); + tmp81 = c_im(inout[6 * iostride]); + tmp78 = c_re(W[5]); + tmp80 = c_im(W[5]); + tmp82 = (tmp78 * tmp79) + (tmp80 * tmp81); + tmp210 = (tmp78 * tmp81) - (tmp80 * tmp79); + } + { + fftw_real tmp77; + fftw_real tmp88; + fftw_real tmp209; + fftw_real tmp212; + ASSERT_ALIGNED_DOUBLE; + tmp77 = tmp71 + tmp76; + tmp88 = tmp82 + tmp87; + tmp89 = tmp77 + tmp88; + tmp393 = tmp77 - tmp88; + tmp209 = tmp71 - tmp76; + tmp212 = tmp210 - tmp211; + tmp213 = tmp209 - tmp212; + tmp309 = tmp209 + tmp212; + } + { + fftw_real tmp352; + fftw_real tmp353; + fftw_real tmp206; + fftw_real tmp207; + ASSERT_ALIGNED_DOUBLE; + tmp352 = tmp204 + tmp205; + tmp353 = tmp210 + tmp211; + tmp354 = tmp352 + tmp353; + tmp392 = tmp352 - tmp353; + tmp206 = tmp204 - tmp205; + tmp207 = tmp82 - tmp87; + tmp208 = tmp206 + tmp207; + tmp308 = tmp206 - tmp207; + } + } + { + fftw_real tmp119; + fftw_real tmp234; + fftw_real tmp124; + fftw_real tmp235; + fftw_real tmp233; + fftw_real tmp236; + fftw_real tmp130; + fftw_real tmp239; + fftw_real tmp135; + fftw_real tmp240; + fftw_real tmp238; + fftw_real tmp241; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp116; + fftw_real tmp118; + fftw_real tmp115; + fftw_real tmp117; + ASSERT_ALIGNED_DOUBLE; + tmp116 = c_re(inout[5 * iostride]); + tmp118 = c_im(inout[5 * iostride]); + tmp115 = c_re(W[4]); + tmp117 = c_im(W[4]); + tmp119 = (tmp115 * tmp116) + (tmp117 * tmp118); + tmp234 = (tmp115 * tmp118) - (tmp117 * tmp116); + } + { + fftw_real tmp121; + fftw_real tmp123; + fftw_real tmp120; + fftw_real tmp122; + ASSERT_ALIGNED_DOUBLE; + tmp121 = c_re(inout[21 * iostride]); + tmp123 = c_im(inout[21 * iostride]); + tmp120 = c_re(W[20]); + tmp122 = c_im(W[20]); + tmp124 = (tmp120 * tmp121) + (tmp122 * tmp123); + tmp235 = (tmp120 * tmp123) - (tmp122 * tmp121); + } + tmp233 = tmp119 - tmp124; + tmp236 = tmp234 - tmp235; + { + fftw_real tmp127; + fftw_real tmp129; + fftw_real tmp126; + fftw_real tmp128; + ASSERT_ALIGNED_DOUBLE; + tmp127 = c_re(inout[29 * iostride]); + tmp129 = c_im(inout[29 * iostride]); + tmp126 = c_re(W[28]); + tmp128 = c_im(W[28]); + tmp130 = (tmp126 * tmp127) + (tmp128 * tmp129); + tmp239 = (tmp126 * tmp129) - (tmp128 * tmp127); + } + { + fftw_real tmp132; + fftw_real tmp134; + fftw_real tmp131; + fftw_real tmp133; + ASSERT_ALIGNED_DOUBLE; + tmp132 = c_re(inout[13 * iostride]); + tmp134 = c_im(inout[13 * iostride]); + tmp131 = c_re(W[12]); + tmp133 = c_im(W[12]); + tmp135 = (tmp131 * tmp132) + (tmp133 * tmp134); + tmp240 = (tmp131 * tmp134) - (tmp133 * tmp132); + } + tmp238 = tmp130 - tmp135; + tmp241 = tmp239 - tmp240; + { + fftw_real tmp125; + fftw_real tmp136; + fftw_real tmp364; + fftw_real tmp365; + ASSERT_ALIGNED_DOUBLE; + tmp125 = tmp119 + tmp124; + tmp136 = tmp130 + tmp135; + tmp137 = tmp125 + tmp136; + tmp408 = tmp125 - tmp136; + tmp364 = tmp234 + tmp235; + tmp365 = tmp239 + tmp240; + tmp366 = tmp364 + tmp365; + tmp411 = tmp365 - tmp364; + } + { + fftw_real tmp237; + fftw_real tmp242; + fftw_real tmp250; + fftw_real tmp251; + ASSERT_ALIGNED_DOUBLE; + tmp237 = tmp233 - tmp236; + tmp242 = tmp238 + tmp241; + tmp243 = K707106781 * (tmp237 + tmp242); + tmp324 = K707106781 * (tmp237 - tmp242); + tmp250 = tmp233 + tmp236; + tmp251 = tmp241 - tmp238; + tmp252 = K707106781 * (tmp250 + tmp251); + tmp327 = K707106781 * (tmp251 - tmp250); + } + } + { + fftw_real tmp91; + fftw_real tmp383; + fftw_real tmp444; + fftw_real tmp446; + fftw_real tmp186; + fftw_real tmp445; + fftw_real tmp386; + fftw_real tmp435; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp43; + fftw_real tmp90; + fftw_real tmp436; + fftw_real tmp443; + ASSERT_ALIGNED_DOUBLE; + tmp43 = tmp19 + tmp42; + tmp90 = tmp66 + tmp89; + tmp91 = tmp43 + tmp90; + tmp383 = tmp43 - tmp90; + tmp436 = tmp357 + tmp354; + tmp443 = tmp437 + tmp442; + tmp444 = tmp436 + tmp443; + tmp446 = tmp443 - tmp436; + } + { + fftw_real tmp138; + fftw_real tmp185; + fftw_real tmp384; + fftw_real tmp385; + ASSERT_ALIGNED_DOUBLE; + tmp138 = tmp114 + tmp137; + tmp185 = tmp161 + tmp184; + tmp186 = tmp138 + tmp185; + tmp445 = tmp138 - tmp185; + tmp384 = tmp372 + tmp375; + tmp385 = tmp363 + tmp366; + tmp386 = tmp384 - tmp385; + tmp435 = tmp385 + tmp384; + } + c_re(inout[16 * iostride]) = tmp91 - tmp186; + c_re(inout[0]) = tmp91 + tmp186; + c_re(inout[24 * iostride]) = tmp383 - tmp386; + c_re(inout[8 * iostride]) = tmp383 + tmp386; + c_im(inout[0]) = tmp435 + tmp444; + c_im(inout[16 * iostride]) = tmp444 - tmp435; + c_im(inout[8 * iostride]) = tmp445 + tmp446; + c_im(inout[24 * iostride]) = tmp446 - tmp445; + } + { + fftw_real tmp359; + fftw_real tmp379; + fftw_real tmp450; + fftw_real tmp452; + fftw_real tmp368; + fftw_real tmp381; + fftw_real tmp377; + fftw_real tmp380; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp351; + fftw_real tmp358; + fftw_real tmp448; + fftw_real tmp449; + ASSERT_ALIGNED_DOUBLE; + tmp351 = tmp19 - tmp42; + tmp358 = tmp354 - tmp357; + tmp359 = tmp351 + tmp358; + tmp379 = tmp351 - tmp358; + tmp448 = tmp66 - tmp89; + tmp449 = tmp442 - tmp437; + tmp450 = tmp448 + tmp449; + tmp452 = tmp449 - tmp448; + } + { + fftw_real tmp360; + fftw_real tmp367; + fftw_real tmp369; + fftw_real tmp376; + ASSERT_ALIGNED_DOUBLE; + tmp360 = tmp114 - tmp137; + tmp367 = tmp363 - tmp366; + tmp368 = tmp360 - tmp367; + tmp381 = tmp360 + tmp367; + tmp369 = tmp161 - tmp184; + tmp376 = tmp372 - tmp375; + tmp377 = tmp369 + tmp376; + tmp380 = tmp376 - tmp369; + } + { + fftw_real tmp378; + fftw_real tmp451; + fftw_real tmp382; + fftw_real tmp447; + ASSERT_ALIGNED_DOUBLE; + tmp378 = K707106781 * (tmp368 + tmp377); + c_re(inout[20 * iostride]) = tmp359 - tmp378; + c_re(inout[4 * iostride]) = tmp359 + tmp378; + tmp451 = K707106781 * (tmp368 - tmp377); + c_im(inout[12 * iostride]) = tmp451 + tmp452; + c_im(inout[28 * iostride]) = tmp452 - tmp451; + tmp382 = K707106781 * (tmp380 - tmp381); + c_re(inout[28 * iostride]) = tmp379 - tmp382; + c_re(inout[12 * iostride]) = tmp379 + tmp382; + tmp447 = K707106781 * (tmp381 + tmp380); + c_im(inout[4 * iostride]) = tmp447 + tmp450; + c_im(inout[20 * iostride]) = tmp450 - tmp447; + } + } + { + fftw_real tmp391; + fftw_real tmp419; + fftw_real tmp398; + fftw_real tmp454; + fftw_real tmp422; + fftw_real tmp462; + fftw_real tmp406; + fftw_real tmp417; + fftw_real tmp457; + fftw_real tmp463; + fftw_real tmp426; + fftw_real tmp433; + fftw_real tmp413; + fftw_real tmp416; + fftw_real tmp429; + fftw_real tmp432; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp394; + fftw_real tmp397; + fftw_real tmp424; + fftw_real tmp425; + ASSERT_ALIGNED_DOUBLE; + tmp391 = tmp387 - tmp390; + tmp419 = tmp387 + tmp390; + tmp394 = tmp392 - tmp393; + tmp397 = tmp395 + tmp396; + tmp398 = K707106781 * (tmp394 - tmp397); + tmp454 = K707106781 * (tmp397 + tmp394); + { + fftw_real tmp420; + fftw_real tmp421; + fftw_real tmp402; + fftw_real tmp405; + ASSERT_ALIGNED_DOUBLE; + tmp420 = tmp395 - tmp396; + tmp421 = tmp393 + tmp392; + tmp422 = K707106781 * (tmp420 + tmp421); + tmp462 = K707106781 * (tmp420 - tmp421); + tmp402 = tmp400 - tmp401; + tmp405 = tmp403 - tmp404; + tmp406 = + (K382683432 * tmp402) - (K923879532 * tmp405); + tmp417 = + (K923879532 * tmp402) + (K382683432 * tmp405); + } + tmp457 = tmp455 + tmp456; + tmp463 = tmp456 - tmp455; + tmp424 = tmp400 + tmp401; + tmp425 = tmp403 + tmp404; + tmp426 = (K923879532 * tmp424) - (K382683432 * tmp425); + tmp433 = (K382683432 * tmp424) + (K923879532 * tmp425); + { + fftw_real tmp409; + fftw_real tmp412; + fftw_real tmp427; + fftw_real tmp428; + ASSERT_ALIGNED_DOUBLE; + tmp409 = tmp407 - tmp408; + tmp412 = tmp410 - tmp411; + tmp413 = + (K382683432 * tmp409) + (K923879532 * tmp412); + tmp416 = + (K382683432 * tmp412) - (K923879532 * tmp409); + tmp427 = tmp407 + tmp408; + tmp428 = tmp410 + tmp411; + tmp429 = + (K923879532 * tmp427) + (K382683432 * tmp428); + tmp432 = + (K923879532 * tmp428) - (K382683432 * tmp427); + } + } + { + fftw_real tmp399; + fftw_real tmp414; + fftw_real tmp415; + fftw_real tmp418; + ASSERT_ALIGNED_DOUBLE; + tmp399 = tmp391 - tmp398; + tmp414 = tmp406 - tmp413; + c_re(inout[30 * iostride]) = tmp399 - tmp414; + c_re(inout[14 * iostride]) = tmp399 + tmp414; + tmp415 = tmp391 + tmp398; + tmp418 = tmp416 + tmp417; + c_re(inout[22 * iostride]) = tmp415 - tmp418; + c_re(inout[6 * iostride]) = tmp415 + tmp418; + } + { + fftw_real tmp465; + fftw_real tmp466; + fftw_real tmp461; + fftw_real tmp464; + ASSERT_ALIGNED_DOUBLE; + tmp465 = tmp416 - tmp417; + tmp466 = tmp463 - tmp462; + c_im(inout[14 * iostride]) = tmp465 + tmp466; + c_im(inout[30 * iostride]) = tmp466 - tmp465; + tmp461 = tmp413 + tmp406; + tmp464 = tmp462 + tmp463; + c_im(inout[6 * iostride]) = tmp461 + tmp464; + c_im(inout[22 * iostride]) = tmp464 - tmp461; + } + { + fftw_real tmp423; + fftw_real tmp430; + fftw_real tmp431; + fftw_real tmp434; + ASSERT_ALIGNED_DOUBLE; + tmp423 = tmp419 - tmp422; + tmp430 = tmp426 - tmp429; + c_re(inout[26 * iostride]) = tmp423 - tmp430; + c_re(inout[10 * iostride]) = tmp423 + tmp430; + tmp431 = tmp419 + tmp422; + tmp434 = tmp432 + tmp433; + c_re(inout[18 * iostride]) = tmp431 - tmp434; + c_re(inout[2 * iostride]) = tmp431 + tmp434; + } + { + fftw_real tmp459; + fftw_real tmp460; + fftw_real tmp453; + fftw_real tmp458; + ASSERT_ALIGNED_DOUBLE; + tmp459 = tmp432 - tmp433; + tmp460 = tmp457 - tmp454; + c_im(inout[10 * iostride]) = tmp459 + tmp460; + c_im(inout[26 * iostride]) = tmp460 - tmp459; + tmp453 = tmp429 + tmp426; + tmp458 = tmp454 + tmp457; + c_im(inout[2 * iostride]) = tmp453 + tmp458; + c_im(inout[18 * iostride]) = tmp458 - tmp453; + } + } + { + fftw_real tmp307; + fftw_real tmp335; + fftw_real tmp338; + fftw_real tmp492; + fftw_real tmp487; + fftw_real tmp493; + fftw_real tmp314; + fftw_real tmp484; + fftw_real tmp322; + fftw_real tmp333; + fftw_real tmp342; + fftw_real tmp349; + fftw_real tmp329; + fftw_real tmp332; + fftw_real tmp345; + fftw_real tmp348; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp306; + fftw_real tmp336; + fftw_real tmp337; + fftw_real tmp485; + fftw_real tmp310; + fftw_real tmp313; + ASSERT_ALIGNED_DOUBLE; + tmp306 = K707106781 * (tmp304 - tmp305); + tmp307 = tmp303 - tmp306; + tmp335 = tmp303 + tmp306; + tmp336 = (K382683432 * tmp312) - (K923879532 * tmp311); + tmp337 = (K923879532 * tmp308) + (K382683432 * tmp309); + tmp338 = tmp336 + tmp337; + tmp492 = tmp336 - tmp337; + tmp485 = K707106781 * (tmp196 - tmp201); + tmp487 = tmp485 + tmp486; + tmp493 = tmp486 - tmp485; + tmp310 = (K382683432 * tmp308) - (K923879532 * tmp309); + tmp313 = (K382683432 * tmp311) + (K923879532 * tmp312); + tmp314 = tmp310 - tmp313; + tmp484 = tmp313 + tmp310; + } + { + fftw_real tmp318; + fftw_real tmp321; + fftw_real tmp340; + fftw_real tmp341; + ASSERT_ALIGNED_DOUBLE; + tmp318 = tmp316 - tmp317; + tmp321 = tmp319 - tmp320; + tmp322 = (K195090322 * tmp318) - (K980785280 * tmp321); + tmp333 = (K980785280 * tmp318) + (K195090322 * tmp321); + tmp340 = tmp316 + tmp317; + tmp341 = tmp319 + tmp320; + tmp342 = (K831469612 * tmp340) - (K555570233 * tmp341); + tmp349 = (K555570233 * tmp340) + (K831469612 * tmp341); + } + { + fftw_real tmp325; + fftw_real tmp328; + fftw_real tmp343; + fftw_real tmp344; + ASSERT_ALIGNED_DOUBLE; + tmp325 = tmp323 - tmp324; + tmp328 = tmp326 - tmp327; + tmp329 = (K195090322 * tmp325) + (K980785280 * tmp328); + tmp332 = (K195090322 * tmp328) - (K980785280 * tmp325); + tmp343 = tmp323 + tmp324; + tmp344 = tmp326 + tmp327; + tmp345 = (K831469612 * tmp343) + (K555570233 * tmp344); + tmp348 = (K831469612 * tmp344) - (K555570233 * tmp343); + } + { + fftw_real tmp315; + fftw_real tmp330; + fftw_real tmp331; + fftw_real tmp334; + ASSERT_ALIGNED_DOUBLE; + tmp315 = tmp307 - tmp314; + tmp330 = tmp322 - tmp329; + c_re(inout[31 * iostride]) = tmp315 - tmp330; + c_re(inout[15 * iostride]) = tmp315 + tmp330; + tmp331 = tmp307 + tmp314; + tmp334 = tmp332 + tmp333; + c_re(inout[23 * iostride]) = tmp331 - tmp334; + c_re(inout[7 * iostride]) = tmp331 + tmp334; + } + { + fftw_real tmp495; + fftw_real tmp496; + fftw_real tmp491; + fftw_real tmp494; + ASSERT_ALIGNED_DOUBLE; + tmp495 = tmp332 - tmp333; + tmp496 = tmp493 - tmp492; + c_im(inout[15 * iostride]) = tmp495 + tmp496; + c_im(inout[31 * iostride]) = tmp496 - tmp495; + tmp491 = tmp329 + tmp322; + tmp494 = tmp492 + tmp493; + c_im(inout[7 * iostride]) = tmp491 + tmp494; + c_im(inout[23 * iostride]) = tmp494 - tmp491; + } + { + fftw_real tmp339; + fftw_real tmp346; + fftw_real tmp347; + fftw_real tmp350; + ASSERT_ALIGNED_DOUBLE; + tmp339 = tmp335 - tmp338; + tmp346 = tmp342 - tmp345; + c_re(inout[27 * iostride]) = tmp339 - tmp346; + c_re(inout[11 * iostride]) = tmp339 + tmp346; + tmp347 = tmp335 + tmp338; + tmp350 = tmp348 + tmp349; + c_re(inout[19 * iostride]) = tmp347 - tmp350; + c_re(inout[3 * iostride]) = tmp347 + tmp350; + } + { + fftw_real tmp489; + fftw_real tmp490; + fftw_real tmp483; + fftw_real tmp488; + ASSERT_ALIGNED_DOUBLE; + tmp489 = tmp348 - tmp349; + tmp490 = tmp487 - tmp484; + c_im(inout[11 * iostride]) = tmp489 + tmp490; + c_im(inout[27 * iostride]) = tmp490 - tmp489; + tmp483 = tmp345 + tmp342; + tmp488 = tmp484 + tmp487; + c_im(inout[3 * iostride]) = tmp483 + tmp488; + c_im(inout[19 * iostride]) = tmp488 - tmp483; + } + } + { + fftw_real tmp203; + fftw_real tmp287; + fftw_real tmp290; + fftw_real tmp478; + fftw_real tmp473; + fftw_real tmp479; + fftw_real tmp226; + fftw_real tmp468; + fftw_real tmp254; + fftw_real tmp285; + fftw_real tmp294; + fftw_real tmp301; + fftw_real tmp281; + fftw_real tmp284; + fftw_real tmp297; + fftw_real tmp300; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp202; + fftw_real tmp288; + fftw_real tmp289; + fftw_real tmp469; + fftw_real tmp214; + fftw_real tmp225; + ASSERT_ALIGNED_DOUBLE; + tmp202 = K707106781 * (tmp196 + tmp201); + tmp203 = tmp191 - tmp202; + tmp287 = tmp191 + tmp202; + tmp288 = (K923879532 * tmp224) - (K382683432 * tmp219); + tmp289 = (K382683432 * tmp208) + (K923879532 * tmp213); + tmp290 = tmp288 + tmp289; + tmp478 = tmp288 - tmp289; + tmp469 = K707106781 * (tmp305 + tmp304); + tmp473 = tmp469 + tmp472; + tmp479 = tmp472 - tmp469; + tmp214 = (K923879532 * tmp208) - (K382683432 * tmp213); + tmp225 = (K923879532 * tmp219) + (K382683432 * tmp224); + tmp226 = tmp214 - tmp225; + tmp468 = tmp225 + tmp214; + } + { + fftw_real tmp244; + fftw_real tmp253; + fftw_real tmp292; + fftw_real tmp293; + ASSERT_ALIGNED_DOUBLE; + tmp244 = tmp232 - tmp243; + tmp253 = tmp249 - tmp252; + tmp254 = (K555570233 * tmp244) - (K831469612 * tmp253); + tmp285 = (K831469612 * tmp244) + (K555570233 * tmp253); + tmp292 = tmp232 + tmp243; + tmp293 = tmp249 + tmp252; + tmp294 = (K980785280 * tmp292) - (K195090322 * tmp293); + tmp301 = (K195090322 * tmp292) + (K980785280 * tmp293); + } + { + fftw_real tmp271; + fftw_real tmp280; + fftw_real tmp295; + fftw_real tmp296; + ASSERT_ALIGNED_DOUBLE; + tmp271 = tmp259 - tmp270; + tmp280 = tmp276 - tmp279; + tmp281 = (K555570233 * tmp271) + (K831469612 * tmp280); + tmp284 = (K555570233 * tmp280) - (K831469612 * tmp271); + tmp295 = tmp259 + tmp270; + tmp296 = tmp276 + tmp279; + tmp297 = (K980785280 * tmp295) + (K195090322 * tmp296); + tmp300 = (K980785280 * tmp296) - (K195090322 * tmp295); + } + { + fftw_real tmp227; + fftw_real tmp282; + fftw_real tmp283; + fftw_real tmp286; + ASSERT_ALIGNED_DOUBLE; + tmp227 = tmp203 + tmp226; + tmp282 = tmp254 + tmp281; + c_re(inout[21 * iostride]) = tmp227 - tmp282; + c_re(inout[5 * iostride]) = tmp227 + tmp282; + tmp283 = tmp203 - tmp226; + tmp286 = tmp284 - tmp285; + c_re(inout[29 * iostride]) = tmp283 - tmp286; + c_re(inout[13 * iostride]) = tmp283 + tmp286; + } + { + fftw_real tmp477; + fftw_real tmp480; + fftw_real tmp481; + fftw_real tmp482; + ASSERT_ALIGNED_DOUBLE; + tmp477 = tmp285 + tmp284; + tmp480 = tmp478 + tmp479; + c_im(inout[5 * iostride]) = tmp477 + tmp480; + c_im(inout[21 * iostride]) = tmp480 - tmp477; + tmp481 = tmp254 - tmp281; + tmp482 = tmp479 - tmp478; + c_im(inout[13 * iostride]) = tmp481 + tmp482; + c_im(inout[29 * iostride]) = tmp482 - tmp481; + } + { + fftw_real tmp291; + fftw_real tmp298; + fftw_real tmp299; + fftw_real tmp302; + ASSERT_ALIGNED_DOUBLE; + tmp291 = tmp287 + tmp290; + tmp298 = tmp294 + tmp297; + c_re(inout[17 * iostride]) = tmp291 - tmp298; + c_re(inout[iostride]) = tmp291 + tmp298; + tmp299 = tmp287 - tmp290; + tmp302 = tmp300 - tmp301; + c_re(inout[25 * iostride]) = tmp299 - tmp302; + c_re(inout[9 * iostride]) = tmp299 + tmp302; + } + { + fftw_real tmp467; + fftw_real tmp474; + fftw_real tmp475; + fftw_real tmp476; + ASSERT_ALIGNED_DOUBLE; + tmp467 = tmp301 + tmp300; + tmp474 = tmp468 + tmp473; + c_im(inout[iostride]) = tmp467 + tmp474; + c_im(inout[17 * iostride]) = tmp474 - tmp467; + tmp475 = tmp294 - tmp297; + tmp476 = tmp473 - tmp468; + c_im(inout[9 * iostride]) = tmp475 + tmp476; + c_im(inout[25 * iostride]) = tmp476 - tmp475; + } + } + } +} + +static const int twiddle_order[] = + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, +20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 }; +fftw_codelet_desc fftwi_twiddle_32_desc = { + "fftwi_twiddle_32", + (void (*)()) fftwi_twiddle_32, + 32, + FFTW_BACKWARD, + FFTW_TWIDDLE, + 715, + 31, + twiddle_order, +}; diff --git a/src/fftw/ftwi_4.c b/src/fftw/ftwi_4.c new file mode 100644 index 0000000..ee5eedb --- /dev/null +++ b/src/fftw/ftwi_4.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:08:25 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 4 */ + +/* + * This function contains 22 FP additions, 12 FP multiplications, + * (or, 16 additions, 6 multiplications, 6 fused multiply/add), + * 14 stack variables, and 16 memory accesses + */ + +/* + * Generator Id's : + * $Id: ftwi_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_twiddle_4(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 3) { + fftw_real tmp1; + fftw_real tmp25; + fftw_real tmp6; + fftw_real tmp24; + fftw_real tmp12; + fftw_real tmp20; + fftw_real tmp17; + fftw_real tmp21; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp25 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[2 * iostride]); + tmp5 = c_im(inout[2 * iostride]); + tmp2 = c_re(W[1]); + tmp4 = c_im(W[1]); + tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5); + tmp24 = (tmp2 * tmp5) - (tmp4 * tmp3); + } + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[iostride]); + tmp11 = c_im(inout[iostride]); + tmp8 = c_re(W[0]); + tmp10 = c_im(W[0]); + tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11); + tmp20 = (tmp8 * tmp11) - (tmp10 * tmp9); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[3 * iostride]); + tmp16 = c_im(inout[3 * iostride]); + tmp13 = c_re(W[2]); + tmp15 = c_im(W[2]); + tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16); + tmp21 = (tmp13 * tmp16) - (tmp15 * tmp14); + } + { + fftw_real tmp7; + fftw_real tmp18; + fftw_real tmp27; + fftw_real tmp28; + ASSERT_ALIGNED_DOUBLE; + tmp7 = tmp1 + tmp6; + tmp18 = tmp12 + tmp17; + c_re(inout[2 * iostride]) = tmp7 - tmp18; + c_re(inout[0]) = tmp7 + tmp18; + tmp27 = tmp12 - tmp17; + tmp28 = tmp25 - tmp24; + c_im(inout[iostride]) = tmp27 + tmp28; + c_im(inout[3 * iostride]) = tmp28 - tmp27; + } + { + fftw_real tmp23; + fftw_real tmp26; + fftw_real tmp19; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp23 = tmp20 + tmp21; + tmp26 = tmp24 + tmp25; + c_im(inout[0]) = tmp23 + tmp26; + c_im(inout[2 * iostride]) = tmp26 - tmp23; + tmp19 = tmp1 - tmp6; + tmp22 = tmp20 - tmp21; + c_re(inout[iostride]) = tmp19 - tmp22; + c_re(inout[3 * iostride]) = tmp19 + tmp22; + } + } +} + +static const int twiddle_order[] = { 1, 2, 3 }; +fftw_codelet_desc fftwi_twiddle_4_desc = { + "fftwi_twiddle_4", + (void (*)()) fftwi_twiddle_4, + 4, + FFTW_BACKWARD, + FFTW_TWIDDLE, + 99, + 3, + twiddle_order, +}; diff --git a/src/fftw/ftwi_5.c b/src/fftw/ftwi_5.c new file mode 100644 index 0000000..c27b152 --- /dev/null +++ b/src/fftw/ftwi_5.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:08:25 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 5 */ + +/* + * This function contains 40 FP additions, 28 FP multiplications, + * (or, 26 additions, 14 multiplications, 14 fused multiply/add), + * 26 stack variables, and 20 memory accesses + */ +static const fftw_real K559016994 = +FFTW_KONST(+0.559016994374947424102293417182819058860154590); +static const fftw_real K250000000 = +FFTW_KONST(+0.250000000000000000000000000000000000000000000); +static const fftw_real K951056516 = +FFTW_KONST(+0.951056516295153572116439333379382143405698634); +static const fftw_real K587785252 = +FFTW_KONST(+0.587785252292473129168705954639072768597652438); + +/* + * Generator Id's : + * $Id: ftwi_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_twiddle_5(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 4) { + fftw_real tmp1; + fftw_real tmp40; + fftw_real tmp30; + fftw_real tmp33; + fftw_real tmp37; + fftw_real tmp38; + fftw_real tmp39; + fftw_real tmp42; + fftw_real tmp41; + fftw_real tmp12; + fftw_real tmp23; + fftw_real tmp24; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp40 = c_im(inout[0]); + { + fftw_real tmp6; + fftw_real tmp28; + fftw_real tmp22; + fftw_real tmp32; + fftw_real tmp11; + fftw_real tmp29; + fftw_real tmp17; + fftw_real tmp31; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[iostride]); + tmp5 = c_im(inout[iostride]); + tmp2 = c_re(W[0]); + tmp4 = c_im(W[0]); + tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5); + tmp28 = (tmp2 * tmp5) - (tmp4 * tmp3); + } + { + fftw_real tmp19; + fftw_real tmp21; + fftw_real tmp18; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + tmp19 = c_re(inout[3 * iostride]); + tmp21 = c_im(inout[3 * iostride]); + tmp18 = c_re(W[2]); + tmp20 = c_im(W[2]); + tmp22 = (tmp18 * tmp19) + (tmp20 * tmp21); + tmp32 = (tmp18 * tmp21) - (tmp20 * tmp19); + } + { + fftw_real tmp8; + fftw_real tmp10; + fftw_real tmp7; + fftw_real tmp9; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(inout[4 * iostride]); + tmp10 = c_im(inout[4 * iostride]); + tmp7 = c_re(W[3]); + tmp9 = c_im(W[3]); + tmp11 = (tmp7 * tmp8) + (tmp9 * tmp10); + tmp29 = (tmp7 * tmp10) - (tmp9 * tmp8); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[2 * iostride]); + tmp16 = c_im(inout[2 * iostride]); + tmp13 = c_re(W[1]); + tmp15 = c_im(W[1]); + tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16); + tmp31 = (tmp13 * tmp16) - (tmp15 * tmp14); + } + tmp30 = tmp28 - tmp29; + tmp33 = tmp31 - tmp32; + tmp37 = tmp28 + tmp29; + tmp38 = tmp31 + tmp32; + tmp39 = tmp37 + tmp38; + tmp42 = tmp17 - tmp22; + tmp41 = tmp6 - tmp11; + tmp12 = tmp6 + tmp11; + tmp23 = tmp17 + tmp22; + tmp24 = tmp12 + tmp23; + } + c_re(inout[0]) = tmp1 + tmp24; + { + fftw_real tmp34; + fftw_real tmp36; + fftw_real tmp27; + fftw_real tmp35; + fftw_real tmp25; + fftw_real tmp26; + ASSERT_ALIGNED_DOUBLE; + tmp34 = (K587785252 * tmp30) - (K951056516 * tmp33); + tmp36 = (K951056516 * tmp30) + (K587785252 * tmp33); + tmp25 = tmp1 - (K250000000 * tmp24); + tmp26 = K559016994 * (tmp12 - tmp23); + tmp27 = tmp25 - tmp26; + tmp35 = tmp26 + tmp25; + c_re(inout[2 * iostride]) = tmp27 - tmp34; + c_re(inout[3 * iostride]) = tmp27 + tmp34; + c_re(inout[iostride]) = tmp35 - tmp36; + c_re(inout[4 * iostride]) = tmp35 + tmp36; + } + c_im(inout[0]) = tmp39 + tmp40; + { + fftw_real tmp43; + fftw_real tmp47; + fftw_real tmp46; + fftw_real tmp48; + fftw_real tmp44; + fftw_real tmp45; + ASSERT_ALIGNED_DOUBLE; + tmp43 = (K951056516 * tmp41) + (K587785252 * tmp42); + tmp47 = (K587785252 * tmp41) - (K951056516 * tmp42); + tmp44 = K559016994 * (tmp37 - tmp38); + tmp45 = tmp40 - (K250000000 * tmp39); + tmp46 = tmp44 + tmp45; + tmp48 = tmp45 - tmp44; + c_im(inout[iostride]) = tmp43 + tmp46; + c_im(inout[4 * iostride]) = tmp46 - tmp43; + c_im(inout[2 * iostride]) = tmp47 + tmp48; + c_im(inout[3 * iostride]) = tmp48 - tmp47; + } + } +} + +static const int twiddle_order[] = { 1, 2, 3, 4 }; +fftw_codelet_desc fftwi_twiddle_5_desc = { + "fftwi_twiddle_5", + (void (*)()) fftwi_twiddle_5, + 5, + FFTW_BACKWARD, + FFTW_TWIDDLE, + 121, + 4, + twiddle_order, +}; diff --git a/src/fftw/ftwi_6.c b/src/fftw/ftwi_6.c new file mode 100644 index 0000000..c53b117 --- /dev/null +++ b/src/fftw/ftwi_6.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:08:26 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 6 */ + +/* + * This function contains 46 FP additions, 28 FP multiplications, + * (or, 32 additions, 14 multiplications, 14 fused multiply/add), + * 22 stack variables, and 24 memory accesses + */ +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); + +/* + * Generator Id's : + * $Id: ftwi_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_twiddle_6(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 5) { + fftw_real tmp7; + fftw_real tmp31; + fftw_real tmp50; + fftw_real tmp54; + fftw_real tmp29; + fftw_real tmp33; + fftw_real tmp38; + fftw_real tmp44; + fftw_real tmp18; + fftw_real tmp32; + fftw_real tmp41; + fftw_real tmp45; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp49; + fftw_real tmp6; + fftw_real tmp48; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp49 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[3 * iostride]); + tmp5 = c_im(inout[3 * iostride]); + tmp2 = c_re(W[2]); + tmp4 = c_im(W[2]); + tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5); + tmp48 = (tmp2 * tmp5) - (tmp4 * tmp3); + } + tmp7 = tmp1 - tmp6; + tmp31 = tmp1 + tmp6; + tmp50 = tmp48 + tmp49; + tmp54 = tmp49 - tmp48; + } + { + fftw_real tmp23; + fftw_real tmp36; + fftw_real tmp28; + fftw_real tmp37; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp20; + fftw_real tmp22; + fftw_real tmp19; + fftw_real tmp21; + ASSERT_ALIGNED_DOUBLE; + tmp20 = c_re(inout[4 * iostride]); + tmp22 = c_im(inout[4 * iostride]); + tmp19 = c_re(W[3]); + tmp21 = c_im(W[3]); + tmp23 = (tmp19 * tmp20) + (tmp21 * tmp22); + tmp36 = (tmp19 * tmp22) - (tmp21 * tmp20); + } + { + fftw_real tmp25; + fftw_real tmp27; + fftw_real tmp24; + fftw_real tmp26; + ASSERT_ALIGNED_DOUBLE; + tmp25 = c_re(inout[iostride]); + tmp27 = c_im(inout[iostride]); + tmp24 = c_re(W[0]); + tmp26 = c_im(W[0]); + tmp28 = (tmp24 * tmp25) + (tmp26 * tmp27); + tmp37 = (tmp24 * tmp27) - (tmp26 * tmp25); + } + tmp29 = tmp23 - tmp28; + tmp33 = tmp23 + tmp28; + tmp38 = tmp36 - tmp37; + tmp44 = tmp36 + tmp37; + } + { + fftw_real tmp12; + fftw_real tmp39; + fftw_real tmp17; + fftw_real tmp40; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[2 * iostride]); + tmp11 = c_im(inout[2 * iostride]); + tmp8 = c_re(W[1]); + tmp10 = c_im(W[1]); + tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11); + tmp39 = (tmp8 * tmp11) - (tmp10 * tmp9); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[5 * iostride]); + tmp16 = c_im(inout[5 * iostride]); + tmp13 = c_re(W[4]); + tmp15 = c_im(W[4]); + tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16); + tmp40 = (tmp13 * tmp16) - (tmp15 * tmp14); + } + tmp18 = tmp12 - tmp17; + tmp32 = tmp12 + tmp17; + tmp41 = tmp39 - tmp40; + tmp45 = tmp39 + tmp40; + } + { + fftw_real tmp42; + fftw_real tmp30; + fftw_real tmp35; + fftw_real tmp53; + fftw_real tmp55; + fftw_real tmp56; + ASSERT_ALIGNED_DOUBLE; + tmp42 = K866025403 * (tmp38 - tmp41); + tmp30 = tmp18 + tmp29; + tmp35 = tmp7 - (K500000000 * tmp30); + c_re(inout[3 * iostride]) = tmp7 + tmp30; + c_re(inout[iostride]) = tmp35 + tmp42; + c_re(inout[5 * iostride]) = tmp35 - tmp42; + tmp53 = K866025403 * (tmp18 - tmp29); + tmp55 = tmp41 + tmp38; + tmp56 = tmp54 - (K500000000 * tmp55); + c_im(inout[iostride]) = tmp53 + tmp56; + c_im(inout[5 * iostride]) = tmp56 - tmp53; + c_im(inout[3 * iostride]) = tmp55 + tmp54; + } + { + fftw_real tmp46; + fftw_real tmp34; + fftw_real tmp43; + fftw_real tmp52; + fftw_real tmp47; + fftw_real tmp51; + ASSERT_ALIGNED_DOUBLE; + tmp46 = K866025403 * (tmp44 - tmp45); + tmp34 = tmp32 + tmp33; + tmp43 = tmp31 - (K500000000 * tmp34); + c_re(inout[0]) = tmp31 + tmp34; + c_re(inout[4 * iostride]) = tmp43 + tmp46; + c_re(inout[2 * iostride]) = tmp43 - tmp46; + tmp52 = K866025403 * (tmp32 - tmp33); + tmp47 = tmp45 + tmp44; + tmp51 = tmp50 - (K500000000 * tmp47); + c_im(inout[0]) = tmp47 + tmp50; + c_im(inout[4 * iostride]) = tmp52 + tmp51; + c_im(inout[2 * iostride]) = tmp51 - tmp52; + } + } +} + +static const int twiddle_order[] = { 1, 2, 3, 4, 5 }; +fftw_codelet_desc fftwi_twiddle_6_desc = { + "fftwi_twiddle_6", + (void (*)()) fftwi_twiddle_6, + 6, + FFTW_BACKWARD, + FFTW_TWIDDLE, + 143, + 5, + twiddle_order, +}; diff --git a/src/fftw/ftwi_64.c b/src/fftw/ftwi_64.c new file mode 100644 index 0000000..3c35cb9 --- /dev/null +++ b/src/fftw/ftwi_64.c @@ -0,0 +1,3207 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:08:40 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 64 */ + +/* + * This function contains 1038 FP additions, 500 FP multiplications, + * (or, 808 additions, 270 multiplications, 230 fused multiply/add), + * 162 stack variables, and 256 memory accesses + */ +static const fftw_real K098017140 = +FFTW_KONST(+0.098017140329560601994195563888641845861136673); +static const fftw_real K995184726 = +FFTW_KONST(+0.995184726672196886244836953109479921575474869); +static const fftw_real K773010453 = +FFTW_KONST(+0.773010453362736960810906609758469800971041293); +static const fftw_real K634393284 = +FFTW_KONST(+0.634393284163645498215171613225493370675687095); +static const fftw_real K195090322 = +FFTW_KONST(+0.195090322016128267848284868477022240927691618); +static const fftw_real K980785280 = +FFTW_KONST(+0.980785280403230449126182236134239036973933731); +static const fftw_real K471396736 = +FFTW_KONST(+0.471396736825997648556387625905254377657460319); +static const fftw_real K881921264 = +FFTW_KONST(+0.881921264348355029712756863660388349508442621); +static const fftw_real K956940335 = +FFTW_KONST(+0.956940335732208864935797886980269969482849206); +static const fftw_real K290284677 = +FFTW_KONST(+0.290284677254462367636192375817395274691476278); +static const fftw_real K831469612 = +FFTW_KONST(+0.831469612302545237078788377617905756738560812); +static const fftw_real K555570233 = +FFTW_KONST(+0.555570233019602224742830813948532874374937191); +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); +static const fftw_real K382683432 = +FFTW_KONST(+0.382683432365089771728459984030398866761344562); +static const fftw_real K923879532 = +FFTW_KONST(+0.923879532511286756128183189396788286822416626); + +/* + * Generator Id's : + * $Id: ftwi_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_twiddle_64(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 63) { + fftw_real tmp19; + fftw_real tmp791; + fftw_real tmp1109; + fftw_real tmp1139; + fftw_real tmp1047; + fftw_real tmp1077; + fftw_real tmp383; + fftw_real tmp655; + fftw_real tmp66; + fftw_real tmp800; + fftw_real tmp909; + fftw_real tmp993; + fftw_real tmp417; + fftw_real tmp608; + fftw_real tmp665; + fftw_real tmp744; + fftw_real tmp42; + fftw_real tmp1076; + fftw_real tmp794; + fftw_real tmp1042; + fftw_real tmp394; + fftw_real tmp1138; + fftw_real tmp658; + fftw_real tmp1106; + fftw_real tmp329; + fftw_real tmp1007; + fftw_real tmp863; + fftw_real tmp923; + fftw_real tmp976; + fftw_real tmp1004; + fftw_real tmp880; + fftw_real tmp920; + fftw_real tmp535; + fftw_real tmp703; + fftw_real tmp576; + fftw_real tmp714; + fftw_real tmp579; + fftw_real tmp704; + fftw_real tmp546; + fftw_real tmp715; + fftw_real tmp376; + fftw_real tmp1005; + fftw_real tmp868; + fftw_real tmp881; + fftw_real tmp979; + fftw_real tmp1008; + fftw_real tmp873; + fftw_real tmp882; + fftw_real tmp558; + fftw_real tmp582; + fftw_real tmp708; + fftw_real tmp718; + fftw_real tmp569; + fftw_real tmp581; + fftw_real tmp711; + fftw_real tmp717; + fftw_real tmp89; + fftw_real tmp805; + fftw_real tmp908; + fftw_real tmp992; + fftw_real tmp406; + fftw_real tmp609; + fftw_real tmp662; + fftw_real tmp745; + fftw_real tmp161; + fftw_real tmp184; + fftw_real tmp997; + fftw_real tmp812; + fftw_real tmp912; + fftw_real tmp956; + fftw_real tmp957; + fftw_real tmp996; + fftw_real tmp817; + fftw_real tmp913; + fftw_real tmp424; + fftw_real tmp668; + fftw_real tmp441; + fftw_real tmp671; + fftw_real tmp444; + fftw_real tmp669; + fftw_real tmp435; + fftw_real tmp672; + fftw_real tmp114; + fftw_real tmp137; + fftw_real tmp999; + fftw_real tmp823; + fftw_real tmp915; + fftw_real tmp959; + fftw_real tmp960; + fftw_real tmp1000; + fftw_real tmp828; + fftw_real tmp916; + fftw_real tmp451; + fftw_real tmp678; + fftw_real tmp468; + fftw_real tmp675; + fftw_real tmp471; + fftw_real tmp679; + fftw_real tmp462; + fftw_real tmp676; + fftw_real tmp234; + fftw_real tmp1014; + fftw_real tmp836; + fftw_real tmp930; + fftw_real tmp967; + fftw_real tmp1011; + fftw_real tmp853; + fftw_real tmp927; + fftw_real tmp480; + fftw_real tmp684; + fftw_real tmp521; + fftw_real tmp695; + fftw_real tmp524; + fftw_real tmp685; + fftw_real tmp491; + fftw_real tmp696; + fftw_real tmp281; + fftw_real tmp1012; + fftw_real tmp841; + fftw_real tmp854; + fftw_real tmp970; + fftw_real tmp1015; + fftw_real tmp846; + fftw_real tmp855; + fftw_real tmp503; + fftw_real tmp527; + fftw_real tmp689; + fftw_real tmp699; + fftw_real tmp514; + fftw_real tmp526; + fftw_real tmp692; + fftw_real tmp698; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp1045; + fftw_real tmp6; + fftw_real tmp1044; + fftw_real tmp12; + fftw_real tmp380; + fftw_real tmp17; + fftw_real tmp381; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp1045 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[32 * iostride]); + tmp5 = c_im(inout[32 * iostride]); + tmp2 = c_re(W[31]); + tmp4 = c_im(W[31]); + tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5); + tmp1044 = (tmp2 * tmp5) - (tmp4 * tmp3); + } + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[16 * iostride]); + tmp11 = c_im(inout[16 * iostride]); + tmp8 = c_re(W[15]); + tmp10 = c_im(W[15]); + tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11); + tmp380 = (tmp8 * tmp11) - (tmp10 * tmp9); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[48 * iostride]); + tmp16 = c_im(inout[48 * iostride]); + tmp13 = c_re(W[47]); + tmp15 = c_im(W[47]); + tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16); + tmp381 = (tmp13 * tmp16) - (tmp15 * tmp14); + } + { + fftw_real tmp7; + fftw_real tmp18; + fftw_real tmp1107; + fftw_real tmp1108; + ASSERT_ALIGNED_DOUBLE; + tmp7 = tmp1 + tmp6; + tmp18 = tmp12 + tmp17; + tmp19 = tmp7 + tmp18; + tmp791 = tmp7 - tmp18; + tmp1107 = tmp12 - tmp17; + tmp1108 = tmp1045 - tmp1044; + tmp1109 = tmp1107 + tmp1108; + tmp1139 = tmp1108 - tmp1107; + } + { + fftw_real tmp1043; + fftw_real tmp1046; + fftw_real tmp379; + fftw_real tmp382; + ASSERT_ALIGNED_DOUBLE; + tmp1043 = tmp380 + tmp381; + tmp1046 = tmp1044 + tmp1045; + tmp1047 = tmp1043 + tmp1046; + tmp1077 = tmp1046 - tmp1043; + tmp379 = tmp1 - tmp6; + tmp382 = tmp380 - tmp381; + tmp383 = tmp379 - tmp382; + tmp655 = tmp379 + tmp382; + } + } + { + fftw_real tmp54; + fftw_real tmp412; + fftw_real tmp409; + fftw_real tmp797; + fftw_real tmp65; + fftw_real tmp410; + fftw_real tmp415; + fftw_real tmp798; + fftw_real tmp796; + fftw_real tmp799; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp48; + fftw_real tmp407; + fftw_real tmp53; + fftw_real tmp408; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp45; + fftw_real tmp47; + fftw_real tmp44; + fftw_real tmp46; + ASSERT_ALIGNED_DOUBLE; + tmp45 = c_re(inout[4 * iostride]); + tmp47 = c_im(inout[4 * iostride]); + tmp44 = c_re(W[3]); + tmp46 = c_im(W[3]); + tmp48 = (tmp44 * tmp45) + (tmp46 * tmp47); + tmp407 = (tmp44 * tmp47) - (tmp46 * tmp45); + } + { + fftw_real tmp50; + fftw_real tmp52; + fftw_real tmp49; + fftw_real tmp51; + ASSERT_ALIGNED_DOUBLE; + tmp50 = c_re(inout[36 * iostride]); + tmp52 = c_im(inout[36 * iostride]); + tmp49 = c_re(W[35]); + tmp51 = c_im(W[35]); + tmp53 = (tmp49 * tmp50) + (tmp51 * tmp52); + tmp408 = (tmp49 * tmp52) - (tmp51 * tmp50); + } + tmp54 = tmp48 + tmp53; + tmp412 = tmp48 - tmp53; + tmp409 = tmp407 - tmp408; + tmp797 = tmp407 + tmp408; + } + { + fftw_real tmp59; + fftw_real tmp413; + fftw_real tmp64; + fftw_real tmp414; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp56; + fftw_real tmp58; + fftw_real tmp55; + fftw_real tmp57; + ASSERT_ALIGNED_DOUBLE; + tmp56 = c_re(inout[20 * iostride]); + tmp58 = c_im(inout[20 * iostride]); + tmp55 = c_re(W[19]); + tmp57 = c_im(W[19]); + tmp59 = (tmp55 * tmp56) + (tmp57 * tmp58); + tmp413 = (tmp55 * tmp58) - (tmp57 * tmp56); + } + { + fftw_real tmp61; + fftw_real tmp63; + fftw_real tmp60; + fftw_real tmp62; + ASSERT_ALIGNED_DOUBLE; + tmp61 = c_re(inout[52 * iostride]); + tmp63 = c_im(inout[52 * iostride]); + tmp60 = c_re(W[51]); + tmp62 = c_im(W[51]); + tmp64 = (tmp60 * tmp61) + (tmp62 * tmp63); + tmp414 = (tmp60 * tmp63) - (tmp62 * tmp61); + } + tmp65 = tmp59 + tmp64; + tmp410 = tmp59 - tmp64; + tmp415 = tmp413 - tmp414; + tmp798 = tmp413 + tmp414; + } + tmp66 = tmp54 + tmp65; + tmp796 = tmp54 - tmp65; + tmp799 = tmp797 - tmp798; + tmp800 = tmp796 - tmp799; + tmp909 = tmp796 + tmp799; + tmp993 = tmp797 + tmp798; + { + fftw_real tmp411; + fftw_real tmp416; + fftw_real tmp663; + fftw_real tmp664; + ASSERT_ALIGNED_DOUBLE; + tmp411 = tmp409 + tmp410; + tmp416 = tmp412 - tmp415; + tmp417 = (K923879532 * tmp411) + (K382683432 * tmp416); + tmp608 = (K923879532 * tmp416) - (K382683432 * tmp411); + tmp663 = tmp409 - tmp410; + tmp664 = tmp412 + tmp415; + tmp665 = (K382683432 * tmp663) + (K923879532 * tmp664); + tmp744 = (K382683432 * tmp664) - (K923879532 * tmp663); + } + } + { + fftw_real tmp24; + fftw_real tmp385; + fftw_real tmp29; + fftw_real tmp386; + fftw_real tmp384; + fftw_real tmp387; + fftw_real tmp35; + fftw_real tmp390; + fftw_real tmp40; + fftw_real tmp391; + fftw_real tmp389; + fftw_real tmp392; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp21; + fftw_real tmp23; + fftw_real tmp20; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp21 = c_re(inout[8 * iostride]); + tmp23 = c_im(inout[8 * iostride]); + tmp20 = c_re(W[7]); + tmp22 = c_im(W[7]); + tmp24 = (tmp20 * tmp21) + (tmp22 * tmp23); + tmp385 = (tmp20 * tmp23) - (tmp22 * tmp21); + } + { + fftw_real tmp26; + fftw_real tmp28; + fftw_real tmp25; + fftw_real tmp27; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(inout[40 * iostride]); + tmp28 = c_im(inout[40 * iostride]); + tmp25 = c_re(W[39]); + tmp27 = c_im(W[39]); + tmp29 = (tmp25 * tmp26) + (tmp27 * tmp28); + tmp386 = (tmp25 * tmp28) - (tmp27 * tmp26); + } + tmp384 = tmp24 - tmp29; + tmp387 = tmp385 - tmp386; + { + fftw_real tmp32; + fftw_real tmp34; + fftw_real tmp31; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(inout[56 * iostride]); + tmp34 = c_im(inout[56 * iostride]); + tmp31 = c_re(W[55]); + tmp33 = c_im(W[55]); + tmp35 = (tmp31 * tmp32) + (tmp33 * tmp34); + tmp390 = (tmp31 * tmp34) - (tmp33 * tmp32); + } + { + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp36; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp37 = c_re(inout[24 * iostride]); + tmp39 = c_im(inout[24 * iostride]); + tmp36 = c_re(W[23]); + tmp38 = c_im(W[23]); + tmp40 = (tmp36 * tmp37) + (tmp38 * tmp39); + tmp391 = (tmp36 * tmp39) - (tmp38 * tmp37); + } + tmp389 = tmp35 - tmp40; + tmp392 = tmp390 - tmp391; + { + fftw_real tmp30; + fftw_real tmp41; + fftw_real tmp792; + fftw_real tmp793; + ASSERT_ALIGNED_DOUBLE; + tmp30 = tmp24 + tmp29; + tmp41 = tmp35 + tmp40; + tmp42 = tmp30 + tmp41; + tmp1076 = tmp30 - tmp41; + tmp792 = tmp390 + tmp391; + tmp793 = tmp385 + tmp386; + tmp794 = tmp792 - tmp793; + tmp1042 = tmp793 + tmp792; + } + { + fftw_real tmp388; + fftw_real tmp393; + fftw_real tmp656; + fftw_real tmp657; + ASSERT_ALIGNED_DOUBLE; + tmp388 = tmp384 - tmp387; + tmp393 = tmp389 + tmp392; + tmp394 = K707106781 * (tmp388 + tmp393); + tmp1138 = K707106781 * (tmp388 - tmp393); + tmp656 = tmp392 - tmp389; + tmp657 = tmp384 + tmp387; + tmp658 = K707106781 * (tmp656 - tmp657); + tmp1106 = K707106781 * (tmp657 + tmp656); + } + } + { + fftw_real tmp287; + fftw_real tmp572; + fftw_real tmp292; + fftw_real tmp573; + fftw_real tmp293; + fftw_real tmp876; + fftw_real tmp327; + fftw_real tmp541; + fftw_real tmp544; + fftw_real tmp860; + fftw_real tmp298; + fftw_real tmp532; + fftw_real tmp303; + fftw_real tmp533; + fftw_real tmp304; + fftw_real tmp877; + fftw_real tmp316; + fftw_real tmp536; + fftw_real tmp539; + fftw_real tmp861; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp284; + fftw_real tmp286; + fftw_real tmp283; + fftw_real tmp285; + ASSERT_ALIGNED_DOUBLE; + tmp284 = c_re(inout[63 * iostride]); + tmp286 = c_im(inout[63 * iostride]); + tmp283 = c_re(W[62]); + tmp285 = c_im(W[62]); + tmp287 = (tmp283 * tmp284) + (tmp285 * tmp286); + tmp572 = (tmp283 * tmp286) - (tmp285 * tmp284); + } + { + fftw_real tmp289; + fftw_real tmp291; + fftw_real tmp288; + fftw_real tmp290; + ASSERT_ALIGNED_DOUBLE; + tmp289 = c_re(inout[31 * iostride]); + tmp291 = c_im(inout[31 * iostride]); + tmp288 = c_re(W[30]); + tmp290 = c_im(W[30]); + tmp292 = (tmp288 * tmp289) + (tmp290 * tmp291); + tmp573 = (tmp288 * tmp291) - (tmp290 * tmp289); + } + tmp293 = tmp287 + tmp292; + tmp876 = tmp572 + tmp573; + { + fftw_real tmp321; + fftw_real tmp542; + fftw_real tmp326; + fftw_real tmp543; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp318; + fftw_real tmp320; + fftw_real tmp317; + fftw_real tmp319; + ASSERT_ALIGNED_DOUBLE; + tmp318 = c_re(inout[55 * iostride]); + tmp320 = c_im(inout[55 * iostride]); + tmp317 = c_re(W[54]); + tmp319 = c_im(W[54]); + tmp321 = (tmp317 * tmp318) + (tmp319 * tmp320); + tmp542 = (tmp317 * tmp320) - (tmp319 * tmp318); + } + { + fftw_real tmp323; + fftw_real tmp325; + fftw_real tmp322; + fftw_real tmp324; + ASSERT_ALIGNED_DOUBLE; + tmp323 = c_re(inout[23 * iostride]); + tmp325 = c_im(inout[23 * iostride]); + tmp322 = c_re(W[22]); + tmp324 = c_im(W[22]); + tmp326 = (tmp322 * tmp323) + (tmp324 * tmp325); + tmp543 = (tmp322 * tmp325) - (tmp324 * tmp323); + } + tmp327 = tmp321 + tmp326; + tmp541 = tmp321 - tmp326; + tmp544 = tmp542 - tmp543; + tmp860 = tmp542 + tmp543; + } + { + fftw_real tmp295; + fftw_real tmp297; + fftw_real tmp294; + fftw_real tmp296; + ASSERT_ALIGNED_DOUBLE; + tmp295 = c_re(inout[15 * iostride]); + tmp297 = c_im(inout[15 * iostride]); + tmp294 = c_re(W[14]); + tmp296 = c_im(W[14]); + tmp298 = (tmp294 * tmp295) + (tmp296 * tmp297); + tmp532 = (tmp294 * tmp297) - (tmp296 * tmp295); + } + { + fftw_real tmp300; + fftw_real tmp302; + fftw_real tmp299; + fftw_real tmp301; + ASSERT_ALIGNED_DOUBLE; + tmp300 = c_re(inout[47 * iostride]); + tmp302 = c_im(inout[47 * iostride]); + tmp299 = c_re(W[46]); + tmp301 = c_im(W[46]); + tmp303 = (tmp299 * tmp300) + (tmp301 * tmp302); + tmp533 = (tmp299 * tmp302) - (tmp301 * tmp300); + } + tmp304 = tmp298 + tmp303; + tmp877 = tmp532 + tmp533; + { + fftw_real tmp310; + fftw_real tmp537; + fftw_real tmp315; + fftw_real tmp538; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp307; + fftw_real tmp309; + fftw_real tmp306; + fftw_real tmp308; + ASSERT_ALIGNED_DOUBLE; + tmp307 = c_re(inout[7 * iostride]); + tmp309 = c_im(inout[7 * iostride]); + tmp306 = c_re(W[6]); + tmp308 = c_im(W[6]); + tmp310 = (tmp306 * tmp307) + (tmp308 * tmp309); + tmp537 = (tmp306 * tmp309) - (tmp308 * tmp307); + } + { + fftw_real tmp312; + fftw_real tmp314; + fftw_real tmp311; + fftw_real tmp313; + ASSERT_ALIGNED_DOUBLE; + tmp312 = c_re(inout[39 * iostride]); + tmp314 = c_im(inout[39 * iostride]); + tmp311 = c_re(W[38]); + tmp313 = c_im(W[38]); + tmp315 = (tmp311 * tmp312) + (tmp313 * tmp314); + tmp538 = (tmp311 * tmp314) - (tmp313 * tmp312); + } + tmp316 = tmp310 + tmp315; + tmp536 = tmp310 - tmp315; + tmp539 = tmp537 - tmp538; + tmp861 = tmp537 + tmp538; + } + { + fftw_real tmp305; + fftw_real tmp328; + fftw_real tmp859; + fftw_real tmp862; + ASSERT_ALIGNED_DOUBLE; + tmp305 = tmp293 + tmp304; + tmp328 = tmp316 + tmp327; + tmp329 = tmp305 + tmp328; + tmp1007 = tmp305 - tmp328; + tmp859 = tmp293 - tmp304; + tmp862 = tmp860 - tmp861; + tmp863 = tmp859 + tmp862; + tmp923 = tmp859 - tmp862; + } + { + fftw_real tmp974; + fftw_real tmp975; + fftw_real tmp878; + fftw_real tmp879; + ASSERT_ALIGNED_DOUBLE; + tmp974 = tmp876 + tmp877; + tmp975 = tmp861 + tmp860; + tmp976 = tmp974 + tmp975; + tmp1004 = tmp974 - tmp975; + tmp878 = tmp876 - tmp877; + tmp879 = tmp316 - tmp327; + tmp880 = tmp878 + tmp879; + tmp920 = tmp878 - tmp879; + } + { + fftw_real tmp531; + fftw_real tmp534; + fftw_real tmp574; + fftw_real tmp575; + ASSERT_ALIGNED_DOUBLE; + tmp531 = tmp287 - tmp292; + tmp534 = tmp532 - tmp533; + tmp535 = tmp531 - tmp534; + tmp703 = tmp531 + tmp534; + tmp574 = tmp572 - tmp573; + tmp575 = tmp298 - tmp303; + tmp576 = tmp574 + tmp575; + tmp714 = tmp574 - tmp575; + } + { + fftw_real tmp577; + fftw_real tmp578; + fftw_real tmp540; + fftw_real tmp545; + ASSERT_ALIGNED_DOUBLE; + tmp577 = tmp536 + tmp539; + tmp578 = tmp544 - tmp541; + tmp579 = K707106781 * (tmp577 + tmp578); + tmp704 = K707106781 * (tmp578 - tmp577); + tmp540 = tmp536 - tmp539; + tmp545 = tmp541 + tmp544; + tmp546 = K707106781 * (tmp540 + tmp545); + tmp715 = K707106781 * (tmp540 - tmp545); + } + } + { + fftw_real tmp340; + fftw_real tmp564; + fftw_real tmp561; + fftw_real tmp865; + fftw_real tmp374; + fftw_real tmp551; + fftw_real tmp556; + fftw_real tmp871; + fftw_real tmp351; + fftw_real tmp562; + fftw_real tmp567; + fftw_real tmp866; + fftw_real tmp363; + fftw_real tmp553; + fftw_real tmp550; + fftw_real tmp870; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp334; + fftw_real tmp559; + fftw_real tmp339; + fftw_real tmp560; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp331; + fftw_real tmp333; + fftw_real tmp330; + fftw_real tmp332; + ASSERT_ALIGNED_DOUBLE; + tmp331 = c_re(inout[3 * iostride]); + tmp333 = c_im(inout[3 * iostride]); + tmp330 = c_re(W[2]); + tmp332 = c_im(W[2]); + tmp334 = (tmp330 * tmp331) + (tmp332 * tmp333); + tmp559 = (tmp330 * tmp333) - (tmp332 * tmp331); + } + { + fftw_real tmp336; + fftw_real tmp338; + fftw_real tmp335; + fftw_real tmp337; + ASSERT_ALIGNED_DOUBLE; + tmp336 = c_re(inout[35 * iostride]); + tmp338 = c_im(inout[35 * iostride]); + tmp335 = c_re(W[34]); + tmp337 = c_im(W[34]); + tmp339 = (tmp335 * tmp336) + (tmp337 * tmp338); + tmp560 = (tmp335 * tmp338) - (tmp337 * tmp336); + } + tmp340 = tmp334 + tmp339; + tmp564 = tmp334 - tmp339; + tmp561 = tmp559 - tmp560; + tmp865 = tmp559 + tmp560; + } + { + fftw_real tmp368; + fftw_real tmp554; + fftw_real tmp373; + fftw_real tmp555; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp365; + fftw_real tmp367; + fftw_real tmp364; + fftw_real tmp366; + ASSERT_ALIGNED_DOUBLE; + tmp365 = c_re(inout[11 * iostride]); + tmp367 = c_im(inout[11 * iostride]); + tmp364 = c_re(W[10]); + tmp366 = c_im(W[10]); + tmp368 = (tmp364 * tmp365) + (tmp366 * tmp367); + tmp554 = (tmp364 * tmp367) - (tmp366 * tmp365); + } + { + fftw_real tmp370; + fftw_real tmp372; + fftw_real tmp369; + fftw_real tmp371; + ASSERT_ALIGNED_DOUBLE; + tmp370 = c_re(inout[43 * iostride]); + tmp372 = c_im(inout[43 * iostride]); + tmp369 = c_re(W[42]); + tmp371 = c_im(W[42]); + tmp373 = (tmp369 * tmp370) + (tmp371 * tmp372); + tmp555 = (tmp369 * tmp372) - (tmp371 * tmp370); + } + tmp374 = tmp368 + tmp373; + tmp551 = tmp368 - tmp373; + tmp556 = tmp554 - tmp555; + tmp871 = tmp554 + tmp555; + } + { + fftw_real tmp345; + fftw_real tmp565; + fftw_real tmp350; + fftw_real tmp566; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp342; + fftw_real tmp344; + fftw_real tmp341; + fftw_real tmp343; + ASSERT_ALIGNED_DOUBLE; + tmp342 = c_re(inout[19 * iostride]); + tmp344 = c_im(inout[19 * iostride]); + tmp341 = c_re(W[18]); + tmp343 = c_im(W[18]); + tmp345 = (tmp341 * tmp342) + (tmp343 * tmp344); + tmp565 = (tmp341 * tmp344) - (tmp343 * tmp342); + } + { + fftw_real tmp347; + fftw_real tmp349; + fftw_real tmp346; + fftw_real tmp348; + ASSERT_ALIGNED_DOUBLE; + tmp347 = c_re(inout[51 * iostride]); + tmp349 = c_im(inout[51 * iostride]); + tmp346 = c_re(W[50]); + tmp348 = c_im(W[50]); + tmp350 = (tmp346 * tmp347) + (tmp348 * tmp349); + tmp566 = (tmp346 * tmp349) - (tmp348 * tmp347); + } + tmp351 = tmp345 + tmp350; + tmp562 = tmp345 - tmp350; + tmp567 = tmp565 - tmp566; + tmp866 = tmp565 + tmp566; + } + { + fftw_real tmp357; + fftw_real tmp548; + fftw_real tmp362; + fftw_real tmp549; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp354; + fftw_real tmp356; + fftw_real tmp353; + fftw_real tmp355; + ASSERT_ALIGNED_DOUBLE; + tmp354 = c_re(inout[59 * iostride]); + tmp356 = c_im(inout[59 * iostride]); + tmp353 = c_re(W[58]); + tmp355 = c_im(W[58]); + tmp357 = (tmp353 * tmp354) + (tmp355 * tmp356); + tmp548 = (tmp353 * tmp356) - (tmp355 * tmp354); + } + { + fftw_real tmp359; + fftw_real tmp361; + fftw_real tmp358; + fftw_real tmp360; + ASSERT_ALIGNED_DOUBLE; + tmp359 = c_re(inout[27 * iostride]); + tmp361 = c_im(inout[27 * iostride]); + tmp358 = c_re(W[26]); + tmp360 = c_im(W[26]); + tmp362 = (tmp358 * tmp359) + (tmp360 * tmp361); + tmp549 = (tmp358 * tmp361) - (tmp360 * tmp359); + } + tmp363 = tmp357 + tmp362; + tmp553 = tmp357 - tmp362; + tmp550 = tmp548 - tmp549; + tmp870 = tmp548 + tmp549; + } + { + fftw_real tmp352; + fftw_real tmp375; + fftw_real tmp864; + fftw_real tmp867; + ASSERT_ALIGNED_DOUBLE; + tmp352 = tmp340 + tmp351; + tmp375 = tmp363 + tmp374; + tmp376 = tmp352 + tmp375; + tmp1005 = tmp352 - tmp375; + tmp864 = tmp340 - tmp351; + tmp867 = tmp865 - tmp866; + tmp868 = tmp864 - tmp867; + tmp881 = tmp864 + tmp867; + } + { + fftw_real tmp977; + fftw_real tmp978; + fftw_real tmp869; + fftw_real tmp872; + ASSERT_ALIGNED_DOUBLE; + tmp977 = tmp865 + tmp866; + tmp978 = tmp870 + tmp871; + tmp979 = tmp977 + tmp978; + tmp1008 = tmp978 - tmp977; + tmp869 = tmp363 - tmp374; + tmp872 = tmp870 - tmp871; + tmp873 = tmp869 + tmp872; + tmp882 = tmp872 - tmp869; + } + { + fftw_real tmp552; + fftw_real tmp557; + fftw_real tmp706; + fftw_real tmp707; + ASSERT_ALIGNED_DOUBLE; + tmp552 = tmp550 + tmp551; + tmp557 = tmp553 - tmp556; + tmp558 = (K923879532 * tmp552) - (K382683432 * tmp557); + tmp582 = (K382683432 * tmp552) + (K923879532 * tmp557); + tmp706 = tmp550 - tmp551; + tmp707 = tmp553 + tmp556; + tmp708 = (K382683432 * tmp706) - (K923879532 * tmp707); + tmp718 = (K923879532 * tmp706) + (K382683432 * tmp707); + } + { + fftw_real tmp563; + fftw_real tmp568; + fftw_real tmp709; + fftw_real tmp710; + ASSERT_ALIGNED_DOUBLE; + tmp563 = tmp561 + tmp562; + tmp568 = tmp564 - tmp567; + tmp569 = (K923879532 * tmp563) + (K382683432 * tmp568); + tmp581 = (K923879532 * tmp568) - (K382683432 * tmp563); + tmp709 = tmp561 - tmp562; + tmp710 = tmp564 + tmp567; + tmp711 = (K382683432 * tmp709) + (K923879532 * tmp710); + tmp717 = (K382683432 * tmp710) - (K923879532 * tmp709); + } + } + { + fftw_real tmp77; + fftw_real tmp401; + fftw_real tmp398; + fftw_real tmp802; + fftw_real tmp88; + fftw_real tmp399; + fftw_real tmp404; + fftw_real tmp803; + fftw_real tmp801; + fftw_real tmp804; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp71; + fftw_real tmp396; + fftw_real tmp76; + fftw_real tmp397; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp68; + fftw_real tmp70; + fftw_real tmp67; + fftw_real tmp69; + ASSERT_ALIGNED_DOUBLE; + tmp68 = c_re(inout[60 * iostride]); + tmp70 = c_im(inout[60 * iostride]); + tmp67 = c_re(W[59]); + tmp69 = c_im(W[59]); + tmp71 = (tmp67 * tmp68) + (tmp69 * tmp70); + tmp396 = (tmp67 * tmp70) - (tmp69 * tmp68); + } + { + fftw_real tmp73; + fftw_real tmp75; + fftw_real tmp72; + fftw_real tmp74; + ASSERT_ALIGNED_DOUBLE; + tmp73 = c_re(inout[28 * iostride]); + tmp75 = c_im(inout[28 * iostride]); + tmp72 = c_re(W[27]); + tmp74 = c_im(W[27]); + tmp76 = (tmp72 * tmp73) + (tmp74 * tmp75); + tmp397 = (tmp72 * tmp75) - (tmp74 * tmp73); + } + tmp77 = tmp71 + tmp76; + tmp401 = tmp71 - tmp76; + tmp398 = tmp396 - tmp397; + tmp802 = tmp396 + tmp397; + } + { + fftw_real tmp82; + fftw_real tmp402; + fftw_real tmp87; + fftw_real tmp403; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp79; + fftw_real tmp81; + fftw_real tmp78; + fftw_real tmp80; + ASSERT_ALIGNED_DOUBLE; + tmp79 = c_re(inout[12 * iostride]); + tmp81 = c_im(inout[12 * iostride]); + tmp78 = c_re(W[11]); + tmp80 = c_im(W[11]); + tmp82 = (tmp78 * tmp79) + (tmp80 * tmp81); + tmp402 = (tmp78 * tmp81) - (tmp80 * tmp79); + } + { + fftw_real tmp84; + fftw_real tmp86; + fftw_real tmp83; + fftw_real tmp85; + ASSERT_ALIGNED_DOUBLE; + tmp84 = c_re(inout[44 * iostride]); + tmp86 = c_im(inout[44 * iostride]); + tmp83 = c_re(W[43]); + tmp85 = c_im(W[43]); + tmp87 = (tmp83 * tmp84) + (tmp85 * tmp86); + tmp403 = (tmp83 * tmp86) - (tmp85 * tmp84); + } + tmp88 = tmp82 + tmp87; + tmp399 = tmp82 - tmp87; + tmp404 = tmp402 - tmp403; + tmp803 = tmp402 + tmp403; + } + tmp89 = tmp77 + tmp88; + tmp801 = tmp77 - tmp88; + tmp804 = tmp802 - tmp803; + tmp805 = tmp801 + tmp804; + tmp908 = tmp804 - tmp801; + tmp992 = tmp802 + tmp803; + { + fftw_real tmp400; + fftw_real tmp405; + fftw_real tmp660; + fftw_real tmp661; + ASSERT_ALIGNED_DOUBLE; + tmp400 = tmp398 + tmp399; + tmp405 = tmp401 - tmp404; + tmp406 = (K923879532 * tmp400) - (K382683432 * tmp405); + tmp609 = (K382683432 * tmp400) + (K923879532 * tmp405); + tmp660 = tmp398 - tmp399; + tmp661 = tmp401 + tmp404; + tmp662 = (K382683432 * tmp660) - (K923879532 * tmp661); + tmp745 = (K923879532 * tmp660) + (K382683432 * tmp661); + } + } + { + fftw_real tmp143; + fftw_real tmp420; + fftw_real tmp148; + fftw_real tmp421; + fftw_real tmp149; + fftw_real tmp808; + fftw_real tmp183; + fftw_real tmp433; + fftw_real tmp432; + fftw_real tmp814; + fftw_real tmp154; + fftw_real tmp438; + fftw_real tmp159; + fftw_real tmp439; + fftw_real tmp160; + fftw_real tmp809; + fftw_real tmp172; + fftw_real tmp425; + fftw_real tmp428; + fftw_real tmp815; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp140; + fftw_real tmp142; + fftw_real tmp139; + fftw_real tmp141; + ASSERT_ALIGNED_DOUBLE; + tmp140 = c_re(inout[62 * iostride]); + tmp142 = c_im(inout[62 * iostride]); + tmp139 = c_re(W[61]); + tmp141 = c_im(W[61]); + tmp143 = (tmp139 * tmp140) + (tmp141 * tmp142); + tmp420 = (tmp139 * tmp142) - (tmp141 * tmp140); + } + { + fftw_real tmp145; + fftw_real tmp147; + fftw_real tmp144; + fftw_real tmp146; + ASSERT_ALIGNED_DOUBLE; + tmp145 = c_re(inout[30 * iostride]); + tmp147 = c_im(inout[30 * iostride]); + tmp144 = c_re(W[29]); + tmp146 = c_im(W[29]); + tmp148 = (tmp144 * tmp145) + (tmp146 * tmp147); + tmp421 = (tmp144 * tmp147) - (tmp146 * tmp145); + } + tmp149 = tmp143 + tmp148; + tmp808 = tmp420 + tmp421; + { + fftw_real tmp177; + fftw_real tmp430; + fftw_real tmp182; + fftw_real tmp431; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp174; + fftw_real tmp176; + fftw_real tmp173; + fftw_real tmp175; + ASSERT_ALIGNED_DOUBLE; + tmp174 = c_re(inout[54 * iostride]); + tmp176 = c_im(inout[54 * iostride]); + tmp173 = c_re(W[53]); + tmp175 = c_im(W[53]); + tmp177 = (tmp173 * tmp174) + (tmp175 * tmp176); + tmp430 = (tmp173 * tmp176) - (tmp175 * tmp174); + } + { + fftw_real tmp179; + fftw_real tmp181; + fftw_real tmp178; + fftw_real tmp180; + ASSERT_ALIGNED_DOUBLE; + tmp179 = c_re(inout[22 * iostride]); + tmp181 = c_im(inout[22 * iostride]); + tmp178 = c_re(W[21]); + tmp180 = c_im(W[21]); + tmp182 = (tmp178 * tmp179) + (tmp180 * tmp181); + tmp431 = (tmp178 * tmp181) - (tmp180 * tmp179); + } + tmp183 = tmp177 + tmp182; + tmp433 = tmp177 - tmp182; + tmp432 = tmp430 - tmp431; + tmp814 = tmp430 + tmp431; + } + { + fftw_real tmp151; + fftw_real tmp153; + fftw_real tmp150; + fftw_real tmp152; + ASSERT_ALIGNED_DOUBLE; + tmp151 = c_re(inout[14 * iostride]); + tmp153 = c_im(inout[14 * iostride]); + tmp150 = c_re(W[13]); + tmp152 = c_im(W[13]); + tmp154 = (tmp150 * tmp151) + (tmp152 * tmp153); + tmp438 = (tmp150 * tmp153) - (tmp152 * tmp151); + } + { + fftw_real tmp156; + fftw_real tmp158; + fftw_real tmp155; + fftw_real tmp157; + ASSERT_ALIGNED_DOUBLE; + tmp156 = c_re(inout[46 * iostride]); + tmp158 = c_im(inout[46 * iostride]); + tmp155 = c_re(W[45]); + tmp157 = c_im(W[45]); + tmp159 = (tmp155 * tmp156) + (tmp157 * tmp158); + tmp439 = (tmp155 * tmp158) - (tmp157 * tmp156); + } + tmp160 = tmp154 + tmp159; + tmp809 = tmp438 + tmp439; + { + fftw_real tmp166; + fftw_real tmp426; + fftw_real tmp171; + fftw_real tmp427; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp163; + fftw_real tmp165; + fftw_real tmp162; + fftw_real tmp164; + ASSERT_ALIGNED_DOUBLE; + tmp163 = c_re(inout[6 * iostride]); + tmp165 = c_im(inout[6 * iostride]); + tmp162 = c_re(W[5]); + tmp164 = c_im(W[5]); + tmp166 = (tmp162 * tmp163) + (tmp164 * tmp165); + tmp426 = (tmp162 * tmp165) - (tmp164 * tmp163); + } + { + fftw_real tmp168; + fftw_real tmp170; + fftw_real tmp167; + fftw_real tmp169; + ASSERT_ALIGNED_DOUBLE; + tmp168 = c_re(inout[38 * iostride]); + tmp170 = c_im(inout[38 * iostride]); + tmp167 = c_re(W[37]); + tmp169 = c_im(W[37]); + tmp171 = (tmp167 * tmp168) + (tmp169 * tmp170); + tmp427 = (tmp167 * tmp170) - (tmp169 * tmp168); + } + tmp172 = tmp166 + tmp171; + tmp425 = tmp166 - tmp171; + tmp428 = tmp426 - tmp427; + tmp815 = tmp426 + tmp427; + } + { + fftw_real tmp810; + fftw_real tmp811; + fftw_real tmp813; + fftw_real tmp816; + ASSERT_ALIGNED_DOUBLE; + tmp161 = tmp149 + tmp160; + tmp184 = tmp172 + tmp183; + tmp997 = tmp161 - tmp184; + tmp810 = tmp808 - tmp809; + tmp811 = tmp172 - tmp183; + tmp812 = tmp810 + tmp811; + tmp912 = tmp810 - tmp811; + tmp956 = tmp808 + tmp809; + tmp957 = tmp815 + tmp814; + tmp996 = tmp956 - tmp957; + tmp813 = tmp149 - tmp160; + tmp816 = tmp814 - tmp815; + tmp817 = tmp813 + tmp816; + tmp913 = tmp813 - tmp816; + } + { + fftw_real tmp422; + fftw_real tmp423; + fftw_real tmp437; + fftw_real tmp440; + ASSERT_ALIGNED_DOUBLE; + tmp422 = tmp420 - tmp421; + tmp423 = tmp154 - tmp159; + tmp424 = tmp422 + tmp423; + tmp668 = tmp422 - tmp423; + tmp437 = tmp143 - tmp148; + tmp440 = tmp438 - tmp439; + tmp441 = tmp437 - tmp440; + tmp671 = tmp437 + tmp440; + } + { + fftw_real tmp442; + fftw_real tmp443; + fftw_real tmp429; + fftw_real tmp434; + ASSERT_ALIGNED_DOUBLE; + tmp442 = tmp425 - tmp428; + tmp443 = tmp433 + tmp432; + tmp444 = K707106781 * (tmp442 + tmp443); + tmp669 = K707106781 * (tmp442 - tmp443); + tmp429 = tmp425 + tmp428; + tmp434 = tmp432 - tmp433; + tmp435 = K707106781 * (tmp429 + tmp434); + tmp672 = K707106781 * (tmp434 - tmp429); + } + } + { + fftw_real tmp96; + fftw_real tmp464; + fftw_real tmp101; + fftw_real tmp465; + fftw_real tmp102; + fftw_real tmp819; + fftw_real tmp136; + fftw_real tmp457; + fftw_real tmp460; + fftw_real tmp825; + fftw_real tmp107; + fftw_real tmp448; + fftw_real tmp112; + fftw_real tmp449; + fftw_real tmp113; + fftw_real tmp820; + fftw_real tmp125; + fftw_real tmp452; + fftw_real tmp455; + fftw_real tmp826; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp93; + fftw_real tmp95; + fftw_real tmp92; + fftw_real tmp94; + ASSERT_ALIGNED_DOUBLE; + tmp93 = c_re(inout[2 * iostride]); + tmp95 = c_im(inout[2 * iostride]); + tmp92 = c_re(W[1]); + tmp94 = c_im(W[1]); + tmp96 = (tmp92 * tmp93) + (tmp94 * tmp95); + tmp464 = (tmp92 * tmp95) - (tmp94 * tmp93); + } + { + fftw_real tmp98; + fftw_real tmp100; + fftw_real tmp97; + fftw_real tmp99; + ASSERT_ALIGNED_DOUBLE; + tmp98 = c_re(inout[34 * iostride]); + tmp100 = c_im(inout[34 * iostride]); + tmp97 = c_re(W[33]); + tmp99 = c_im(W[33]); + tmp101 = (tmp97 * tmp98) + (tmp99 * tmp100); + tmp465 = (tmp97 * tmp100) - (tmp99 * tmp98); + } + tmp102 = tmp96 + tmp101; + tmp819 = tmp464 + tmp465; + { + fftw_real tmp130; + fftw_real tmp458; + fftw_real tmp135; + fftw_real tmp459; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp127; + fftw_real tmp129; + fftw_real tmp126; + fftw_real tmp128; + ASSERT_ALIGNED_DOUBLE; + tmp127 = c_re(inout[58 * iostride]); + tmp129 = c_im(inout[58 * iostride]); + tmp126 = c_re(W[57]); + tmp128 = c_im(W[57]); + tmp130 = (tmp126 * tmp127) + (tmp128 * tmp129); + tmp458 = (tmp126 * tmp129) - (tmp128 * tmp127); + } + { + fftw_real tmp132; + fftw_real tmp134; + fftw_real tmp131; + fftw_real tmp133; + ASSERT_ALIGNED_DOUBLE; + tmp132 = c_re(inout[26 * iostride]); + tmp134 = c_im(inout[26 * iostride]); + tmp131 = c_re(W[25]); + tmp133 = c_im(W[25]); + tmp135 = (tmp131 * tmp132) + (tmp133 * tmp134); + tmp459 = (tmp131 * tmp134) - (tmp133 * tmp132); + } + tmp136 = tmp130 + tmp135; + tmp457 = tmp130 - tmp135; + tmp460 = tmp458 - tmp459; + tmp825 = tmp458 + tmp459; + } + { + fftw_real tmp104; + fftw_real tmp106; + fftw_real tmp103; + fftw_real tmp105; + ASSERT_ALIGNED_DOUBLE; + tmp104 = c_re(inout[18 * iostride]); + tmp106 = c_im(inout[18 * iostride]); + tmp103 = c_re(W[17]); + tmp105 = c_im(W[17]); + tmp107 = (tmp103 * tmp104) + (tmp105 * tmp106); + tmp448 = (tmp103 * tmp106) - (tmp105 * tmp104); + } + { + fftw_real tmp109; + fftw_real tmp111; + fftw_real tmp108; + fftw_real tmp110; + ASSERT_ALIGNED_DOUBLE; + tmp109 = c_re(inout[50 * iostride]); + tmp111 = c_im(inout[50 * iostride]); + tmp108 = c_re(W[49]); + tmp110 = c_im(W[49]); + tmp112 = (tmp108 * tmp109) + (tmp110 * tmp111); + tmp449 = (tmp108 * tmp111) - (tmp110 * tmp109); + } + tmp113 = tmp107 + tmp112; + tmp820 = tmp448 + tmp449; + { + fftw_real tmp119; + fftw_real tmp453; + fftw_real tmp124; + fftw_real tmp454; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp116; + fftw_real tmp118; + fftw_real tmp115; + fftw_real tmp117; + ASSERT_ALIGNED_DOUBLE; + tmp116 = c_re(inout[10 * iostride]); + tmp118 = c_im(inout[10 * iostride]); + tmp115 = c_re(W[9]); + tmp117 = c_im(W[9]); + tmp119 = (tmp115 * tmp116) + (tmp117 * tmp118); + tmp453 = (tmp115 * tmp118) - (tmp117 * tmp116); + } + { + fftw_real tmp121; + fftw_real tmp123; + fftw_real tmp120; + fftw_real tmp122; + ASSERT_ALIGNED_DOUBLE; + tmp121 = c_re(inout[42 * iostride]); + tmp123 = c_im(inout[42 * iostride]); + tmp120 = c_re(W[41]); + tmp122 = c_im(W[41]); + tmp124 = (tmp120 * tmp121) + (tmp122 * tmp123); + tmp454 = (tmp120 * tmp123) - (tmp122 * tmp121); + } + tmp125 = tmp119 + tmp124; + tmp452 = tmp119 - tmp124; + tmp455 = tmp453 - tmp454; + tmp826 = tmp453 + tmp454; + } + { + fftw_real tmp821; + fftw_real tmp822; + fftw_real tmp824; + fftw_real tmp827; + ASSERT_ALIGNED_DOUBLE; + tmp114 = tmp102 + tmp113; + tmp137 = tmp125 + tmp136; + tmp999 = tmp114 - tmp137; + tmp821 = tmp819 - tmp820; + tmp822 = tmp125 - tmp136; + tmp823 = tmp821 + tmp822; + tmp915 = tmp821 - tmp822; + tmp959 = tmp819 + tmp820; + tmp960 = tmp826 + tmp825; + tmp1000 = tmp959 - tmp960; + tmp824 = tmp102 - tmp113; + tmp827 = tmp825 - tmp826; + tmp828 = tmp824 + tmp827; + tmp916 = tmp824 - tmp827; + } + { + fftw_real tmp447; + fftw_real tmp450; + fftw_real tmp466; + fftw_real tmp467; + ASSERT_ALIGNED_DOUBLE; + tmp447 = tmp96 - tmp101; + tmp450 = tmp448 - tmp449; + tmp451 = tmp447 - tmp450; + tmp678 = tmp447 + tmp450; + tmp466 = tmp464 - tmp465; + tmp467 = tmp107 - tmp112; + tmp468 = tmp466 + tmp467; + tmp675 = tmp466 - tmp467; + } + { + fftw_real tmp469; + fftw_real tmp470; + fftw_real tmp456; + fftw_real tmp461; + ASSERT_ALIGNED_DOUBLE; + tmp469 = tmp452 + tmp455; + tmp470 = tmp460 - tmp457; + tmp471 = K707106781 * (tmp469 + tmp470); + tmp679 = K707106781 * (tmp470 - tmp469); + tmp456 = tmp452 - tmp455; + tmp461 = tmp457 + tmp460; + tmp462 = K707106781 * (tmp456 + tmp461); + tmp676 = K707106781 * (tmp456 - tmp461); + } + } + { + fftw_real tmp192; + fftw_real tmp517; + fftw_real tmp197; + fftw_real tmp518; + fftw_real tmp198; + fftw_real tmp849; + fftw_real tmp232; + fftw_real tmp486; + fftw_real tmp489; + fftw_real tmp833; + fftw_real tmp203; + fftw_real tmp477; + fftw_real tmp208; + fftw_real tmp478; + fftw_real tmp209; + fftw_real tmp850; + fftw_real tmp221; + fftw_real tmp481; + fftw_real tmp484; + fftw_real tmp834; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp189; + fftw_real tmp191; + fftw_real tmp188; + fftw_real tmp190; + ASSERT_ALIGNED_DOUBLE; + tmp189 = c_re(inout[iostride]); + tmp191 = c_im(inout[iostride]); + tmp188 = c_re(W[0]); + tmp190 = c_im(W[0]); + tmp192 = (tmp188 * tmp189) + (tmp190 * tmp191); + tmp517 = (tmp188 * tmp191) - (tmp190 * tmp189); + } + { + fftw_real tmp194; + fftw_real tmp196; + fftw_real tmp193; + fftw_real tmp195; + ASSERT_ALIGNED_DOUBLE; + tmp194 = c_re(inout[33 * iostride]); + tmp196 = c_im(inout[33 * iostride]); + tmp193 = c_re(W[32]); + tmp195 = c_im(W[32]); + tmp197 = (tmp193 * tmp194) + (tmp195 * tmp196); + tmp518 = (tmp193 * tmp196) - (tmp195 * tmp194); + } + tmp198 = tmp192 + tmp197; + tmp849 = tmp517 + tmp518; + { + fftw_real tmp226; + fftw_real tmp487; + fftw_real tmp231; + fftw_real tmp488; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp223; + fftw_real tmp225; + fftw_real tmp222; + fftw_real tmp224; + ASSERT_ALIGNED_DOUBLE; + tmp223 = c_re(inout[57 * iostride]); + tmp225 = c_im(inout[57 * iostride]); + tmp222 = c_re(W[56]); + tmp224 = c_im(W[56]); + tmp226 = (tmp222 * tmp223) + (tmp224 * tmp225); + tmp487 = (tmp222 * tmp225) - (tmp224 * tmp223); + } + { + fftw_real tmp228; + fftw_real tmp230; + fftw_real tmp227; + fftw_real tmp229; + ASSERT_ALIGNED_DOUBLE; + tmp228 = c_re(inout[25 * iostride]); + tmp230 = c_im(inout[25 * iostride]); + tmp227 = c_re(W[24]); + tmp229 = c_im(W[24]); + tmp231 = (tmp227 * tmp228) + (tmp229 * tmp230); + tmp488 = (tmp227 * tmp230) - (tmp229 * tmp228); + } + tmp232 = tmp226 + tmp231; + tmp486 = tmp226 - tmp231; + tmp489 = tmp487 - tmp488; + tmp833 = tmp487 + tmp488; + } + { + fftw_real tmp200; + fftw_real tmp202; + fftw_real tmp199; + fftw_real tmp201; + ASSERT_ALIGNED_DOUBLE; + tmp200 = c_re(inout[17 * iostride]); + tmp202 = c_im(inout[17 * iostride]); + tmp199 = c_re(W[16]); + tmp201 = c_im(W[16]); + tmp203 = (tmp199 * tmp200) + (tmp201 * tmp202); + tmp477 = (tmp199 * tmp202) - (tmp201 * tmp200); + } + { + fftw_real tmp205; + fftw_real tmp207; + fftw_real tmp204; + fftw_real tmp206; + ASSERT_ALIGNED_DOUBLE; + tmp205 = c_re(inout[49 * iostride]); + tmp207 = c_im(inout[49 * iostride]); + tmp204 = c_re(W[48]); + tmp206 = c_im(W[48]); + tmp208 = (tmp204 * tmp205) + (tmp206 * tmp207); + tmp478 = (tmp204 * tmp207) - (tmp206 * tmp205); + } + tmp209 = tmp203 + tmp208; + tmp850 = tmp477 + tmp478; + { + fftw_real tmp215; + fftw_real tmp482; + fftw_real tmp220; + fftw_real tmp483; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp212; + fftw_real tmp214; + fftw_real tmp211; + fftw_real tmp213; + ASSERT_ALIGNED_DOUBLE; + tmp212 = c_re(inout[9 * iostride]); + tmp214 = c_im(inout[9 * iostride]); + tmp211 = c_re(W[8]); + tmp213 = c_im(W[8]); + tmp215 = (tmp211 * tmp212) + (tmp213 * tmp214); + tmp482 = (tmp211 * tmp214) - (tmp213 * tmp212); + } + { + fftw_real tmp217; + fftw_real tmp219; + fftw_real tmp216; + fftw_real tmp218; + ASSERT_ALIGNED_DOUBLE; + tmp217 = c_re(inout[41 * iostride]); + tmp219 = c_im(inout[41 * iostride]); + tmp216 = c_re(W[40]); + tmp218 = c_im(W[40]); + tmp220 = (tmp216 * tmp217) + (tmp218 * tmp219); + tmp483 = (tmp216 * tmp219) - (tmp218 * tmp217); + } + tmp221 = tmp215 + tmp220; + tmp481 = tmp215 - tmp220; + tmp484 = tmp482 - tmp483; + tmp834 = tmp482 + tmp483; + } + { + fftw_real tmp210; + fftw_real tmp233; + fftw_real tmp832; + fftw_real tmp835; + ASSERT_ALIGNED_DOUBLE; + tmp210 = tmp198 + tmp209; + tmp233 = tmp221 + tmp232; + tmp234 = tmp210 + tmp233; + tmp1014 = tmp210 - tmp233; + tmp832 = tmp198 - tmp209; + tmp835 = tmp833 - tmp834; + tmp836 = tmp832 + tmp835; + tmp930 = tmp832 - tmp835; + } + { + fftw_real tmp965; + fftw_real tmp966; + fftw_real tmp851; + fftw_real tmp852; + ASSERT_ALIGNED_DOUBLE; + tmp965 = tmp849 + tmp850; + tmp966 = tmp834 + tmp833; + tmp967 = tmp965 + tmp966; + tmp1011 = tmp965 - tmp966; + tmp851 = tmp849 - tmp850; + tmp852 = tmp221 - tmp232; + tmp853 = tmp851 + tmp852; + tmp927 = tmp851 - tmp852; + } + { + fftw_real tmp476; + fftw_real tmp479; + fftw_real tmp519; + fftw_real tmp520; + ASSERT_ALIGNED_DOUBLE; + tmp476 = tmp192 - tmp197; + tmp479 = tmp477 - tmp478; + tmp480 = tmp476 - tmp479; + tmp684 = tmp476 + tmp479; + tmp519 = tmp517 - tmp518; + tmp520 = tmp203 - tmp208; + tmp521 = tmp519 + tmp520; + tmp695 = tmp519 - tmp520; + } + { + fftw_real tmp522; + fftw_real tmp523; + fftw_real tmp485; + fftw_real tmp490; + ASSERT_ALIGNED_DOUBLE; + tmp522 = tmp481 + tmp484; + tmp523 = tmp489 - tmp486; + tmp524 = K707106781 * (tmp522 + tmp523); + tmp685 = K707106781 * (tmp523 - tmp522); + tmp485 = tmp481 - tmp484; + tmp490 = tmp486 + tmp489; + tmp491 = K707106781 * (tmp485 + tmp490); + tmp696 = K707106781 * (tmp485 - tmp490); + } + } + { + fftw_real tmp245; + fftw_real tmp509; + fftw_real tmp506; + fftw_real tmp838; + fftw_real tmp279; + fftw_real tmp496; + fftw_real tmp501; + fftw_real tmp844; + fftw_real tmp256; + fftw_real tmp507; + fftw_real tmp512; + fftw_real tmp839; + fftw_real tmp268; + fftw_real tmp498; + fftw_real tmp495; + fftw_real tmp843; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp239; + fftw_real tmp504; + fftw_real tmp244; + fftw_real tmp505; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp236; + fftw_real tmp238; + fftw_real tmp235; + fftw_real tmp237; + ASSERT_ALIGNED_DOUBLE; + tmp236 = c_re(inout[5 * iostride]); + tmp238 = c_im(inout[5 * iostride]); + tmp235 = c_re(W[4]); + tmp237 = c_im(W[4]); + tmp239 = (tmp235 * tmp236) + (tmp237 * tmp238); + tmp504 = (tmp235 * tmp238) - (tmp237 * tmp236); + } + { + fftw_real tmp241; + fftw_real tmp243; + fftw_real tmp240; + fftw_real tmp242; + ASSERT_ALIGNED_DOUBLE; + tmp241 = c_re(inout[37 * iostride]); + tmp243 = c_im(inout[37 * iostride]); + tmp240 = c_re(W[36]); + tmp242 = c_im(W[36]); + tmp244 = (tmp240 * tmp241) + (tmp242 * tmp243); + tmp505 = (tmp240 * tmp243) - (tmp242 * tmp241); + } + tmp245 = tmp239 + tmp244; + tmp509 = tmp239 - tmp244; + tmp506 = tmp504 - tmp505; + tmp838 = tmp504 + tmp505; + } + { + fftw_real tmp273; + fftw_real tmp499; + fftw_real tmp278; + fftw_real tmp500; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp270; + fftw_real tmp272; + fftw_real tmp269; + fftw_real tmp271; + ASSERT_ALIGNED_DOUBLE; + tmp270 = c_re(inout[13 * iostride]); + tmp272 = c_im(inout[13 * iostride]); + tmp269 = c_re(W[12]); + tmp271 = c_im(W[12]); + tmp273 = (tmp269 * tmp270) + (tmp271 * tmp272); + tmp499 = (tmp269 * tmp272) - (tmp271 * tmp270); + } + { + fftw_real tmp275; + fftw_real tmp277; + fftw_real tmp274; + fftw_real tmp276; + ASSERT_ALIGNED_DOUBLE; + tmp275 = c_re(inout[45 * iostride]); + tmp277 = c_im(inout[45 * iostride]); + tmp274 = c_re(W[44]); + tmp276 = c_im(W[44]); + tmp278 = (tmp274 * tmp275) + (tmp276 * tmp277); + tmp500 = (tmp274 * tmp277) - (tmp276 * tmp275); + } + tmp279 = tmp273 + tmp278; + tmp496 = tmp273 - tmp278; + tmp501 = tmp499 - tmp500; + tmp844 = tmp499 + tmp500; + } + { + fftw_real tmp250; + fftw_real tmp510; + fftw_real tmp255; + fftw_real tmp511; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp247; + fftw_real tmp249; + fftw_real tmp246; + fftw_real tmp248; + ASSERT_ALIGNED_DOUBLE; + tmp247 = c_re(inout[21 * iostride]); + tmp249 = c_im(inout[21 * iostride]); + tmp246 = c_re(W[20]); + tmp248 = c_im(W[20]); + tmp250 = (tmp246 * tmp247) + (tmp248 * tmp249); + tmp510 = (tmp246 * tmp249) - (tmp248 * tmp247); + } + { + fftw_real tmp252; + fftw_real tmp254; + fftw_real tmp251; + fftw_real tmp253; + ASSERT_ALIGNED_DOUBLE; + tmp252 = c_re(inout[53 * iostride]); + tmp254 = c_im(inout[53 * iostride]); + tmp251 = c_re(W[52]); + tmp253 = c_im(W[52]); + tmp255 = (tmp251 * tmp252) + (tmp253 * tmp254); + tmp511 = (tmp251 * tmp254) - (tmp253 * tmp252); + } + tmp256 = tmp250 + tmp255; + tmp507 = tmp250 - tmp255; + tmp512 = tmp510 - tmp511; + tmp839 = tmp510 + tmp511; + } + { + fftw_real tmp262; + fftw_real tmp493; + fftw_real tmp267; + fftw_real tmp494; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp259; + fftw_real tmp261; + fftw_real tmp258; + fftw_real tmp260; + ASSERT_ALIGNED_DOUBLE; + tmp259 = c_re(inout[61 * iostride]); + tmp261 = c_im(inout[61 * iostride]); + tmp258 = c_re(W[60]); + tmp260 = c_im(W[60]); + tmp262 = (tmp258 * tmp259) + (tmp260 * tmp261); + tmp493 = (tmp258 * tmp261) - (tmp260 * tmp259); + } + { + fftw_real tmp264; + fftw_real tmp266; + fftw_real tmp263; + fftw_real tmp265; + ASSERT_ALIGNED_DOUBLE; + tmp264 = c_re(inout[29 * iostride]); + tmp266 = c_im(inout[29 * iostride]); + tmp263 = c_re(W[28]); + tmp265 = c_im(W[28]); + tmp267 = (tmp263 * tmp264) + (tmp265 * tmp266); + tmp494 = (tmp263 * tmp266) - (tmp265 * tmp264); + } + tmp268 = tmp262 + tmp267; + tmp498 = tmp262 - tmp267; + tmp495 = tmp493 - tmp494; + tmp843 = tmp493 + tmp494; + } + { + fftw_real tmp257; + fftw_real tmp280; + fftw_real tmp837; + fftw_real tmp840; + ASSERT_ALIGNED_DOUBLE; + tmp257 = tmp245 + tmp256; + tmp280 = tmp268 + tmp279; + tmp281 = tmp257 + tmp280; + tmp1012 = tmp257 - tmp280; + tmp837 = tmp245 - tmp256; + tmp840 = tmp838 - tmp839; + tmp841 = tmp837 - tmp840; + tmp854 = tmp837 + tmp840; + } + { + fftw_real tmp968; + fftw_real tmp969; + fftw_real tmp842; + fftw_real tmp845; + ASSERT_ALIGNED_DOUBLE; + tmp968 = tmp838 + tmp839; + tmp969 = tmp843 + tmp844; + tmp970 = tmp968 + tmp969; + tmp1015 = tmp969 - tmp968; + tmp842 = tmp268 - tmp279; + tmp845 = tmp843 - tmp844; + tmp846 = tmp842 + tmp845; + tmp855 = tmp845 - tmp842; + } + { + fftw_real tmp497; + fftw_real tmp502; + fftw_real tmp687; + fftw_real tmp688; + ASSERT_ALIGNED_DOUBLE; + tmp497 = tmp495 + tmp496; + tmp502 = tmp498 - tmp501; + tmp503 = (K923879532 * tmp497) - (K382683432 * tmp502); + tmp527 = (K382683432 * tmp497) + (K923879532 * tmp502); + tmp687 = tmp495 - tmp496; + tmp688 = tmp498 + tmp501; + tmp689 = (K382683432 * tmp687) - (K923879532 * tmp688); + tmp699 = (K923879532 * tmp687) + (K382683432 * tmp688); + } + { + fftw_real tmp508; + fftw_real tmp513; + fftw_real tmp690; + fftw_real tmp691; + ASSERT_ALIGNED_DOUBLE; + tmp508 = tmp506 + tmp507; + tmp513 = tmp509 - tmp512; + tmp514 = (K923879532 * tmp508) + (K382683432 * tmp513); + tmp526 = (K923879532 * tmp513) - (K382683432 * tmp508); + tmp690 = tmp506 - tmp507; + tmp691 = tmp509 + tmp512; + tmp692 = (K382683432 * tmp690) + (K923879532 * tmp691); + tmp698 = (K382683432 * tmp691) - (K923879532 * tmp690); + } + } + { + fftw_real tmp91; + fftw_real tmp955; + fftw_real tmp990; + fftw_real tmp1039; + fftw_real tmp1049; + fftw_real tmp1055; + fftw_real tmp186; + fftw_real tmp1054; + fftw_real tmp972; + fftw_real tmp985; + fftw_real tmp378; + fftw_real tmp1051; + fftw_real tmp981; + fftw_real tmp984; + fftw_real tmp962; + fftw_real tmp1040; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp43; + fftw_real tmp90; + fftw_real tmp988; + fftw_real tmp989; + ASSERT_ALIGNED_DOUBLE; + tmp43 = tmp19 + tmp42; + tmp90 = tmp66 + tmp89; + tmp91 = tmp43 + tmp90; + tmp955 = tmp43 - tmp90; + tmp988 = tmp976 + tmp979; + tmp989 = tmp967 + tmp970; + tmp990 = tmp988 - tmp989; + tmp1039 = tmp989 + tmp988; + } + { + fftw_real tmp1041; + fftw_real tmp1048; + fftw_real tmp138; + fftw_real tmp185; + ASSERT_ALIGNED_DOUBLE; + tmp1041 = tmp993 + tmp992; + tmp1048 = tmp1042 + tmp1047; + tmp1049 = tmp1041 + tmp1048; + tmp1055 = tmp1048 - tmp1041; + tmp138 = tmp114 + tmp137; + tmp185 = tmp161 + tmp184; + tmp186 = tmp138 + tmp185; + tmp1054 = tmp138 - tmp185; + } + { + fftw_real tmp964; + fftw_real tmp971; + fftw_real tmp282; + fftw_real tmp377; + ASSERT_ALIGNED_DOUBLE; + tmp964 = tmp234 - tmp281; + tmp971 = tmp967 - tmp970; + tmp972 = tmp964 - tmp971; + tmp985 = tmp964 + tmp971; + tmp282 = tmp234 + tmp281; + tmp377 = tmp329 + tmp376; + tmp378 = tmp282 + tmp377; + tmp1051 = tmp282 - tmp377; + } + { + fftw_real tmp973; + fftw_real tmp980; + fftw_real tmp958; + fftw_real tmp961; + ASSERT_ALIGNED_DOUBLE; + tmp973 = tmp329 - tmp376; + tmp980 = tmp976 - tmp979; + tmp981 = tmp973 + tmp980; + tmp984 = tmp980 - tmp973; + tmp958 = tmp956 + tmp957; + tmp961 = tmp959 + tmp960; + tmp962 = tmp958 - tmp961; + tmp1040 = tmp961 + tmp958; + } + { + fftw_real tmp187; + fftw_real tmp987; + fftw_real tmp1050; + fftw_real tmp1052; + ASSERT_ALIGNED_DOUBLE; + tmp187 = tmp91 + tmp186; + c_re(inout[32 * iostride]) = tmp187 - tmp378; + c_re(inout[0]) = tmp187 + tmp378; + tmp987 = tmp91 - tmp186; + c_re(inout[48 * iostride]) = tmp987 - tmp990; + c_re(inout[16 * iostride]) = tmp987 + tmp990; + { + fftw_real tmp963; + fftw_real tmp982; + fftw_real tmp1057; + fftw_real tmp1058; + ASSERT_ALIGNED_DOUBLE; + tmp963 = tmp955 + tmp962; + tmp982 = K707106781 * (tmp972 + tmp981); + c_re(inout[40 * iostride]) = tmp963 - tmp982; + c_re(inout[8 * iostride]) = tmp963 + tmp982; + tmp1057 = K707106781 * (tmp972 - tmp981); + tmp1058 = tmp1055 - tmp1054; + c_im(inout[24 * iostride]) = tmp1057 + tmp1058; + c_im(inout[56 * iostride]) = tmp1058 - tmp1057; + } + tmp1050 = tmp1040 + tmp1049; + c_im(inout[0]) = tmp1039 + tmp1050; + c_im(inout[32 * iostride]) = tmp1050 - tmp1039; + tmp1052 = tmp1049 - tmp1040; + c_im(inout[16 * iostride]) = tmp1051 + tmp1052; + c_im(inout[48 * iostride]) = tmp1052 - tmp1051; + { + fftw_real tmp1053; + fftw_real tmp1056; + fftw_real tmp983; + fftw_real tmp986; + ASSERT_ALIGNED_DOUBLE; + tmp1053 = K707106781 * (tmp985 + tmp984); + tmp1056 = tmp1054 + tmp1055; + c_im(inout[8 * iostride]) = tmp1053 + tmp1056; + c_im(inout[40 * iostride]) = tmp1056 - tmp1053; + tmp983 = tmp955 - tmp962; + tmp986 = K707106781 * (tmp984 - tmp985); + c_re(inout[56 * iostride]) = tmp983 - tmp986; + c_re(inout[24 * iostride]) = tmp983 + tmp986; + } + } + } + { + fftw_real tmp995; + fftw_real tmp1023; + fftw_real tmp1026; + fftw_real tmp1068; + fftw_real tmp1002; + fftw_real tmp1060; + fftw_real tmp1063; + fftw_real tmp1069; + fftw_real tmp1010; + fftw_real tmp1021; + fftw_real tmp1030; + fftw_real tmp1037; + fftw_real tmp1017; + fftw_real tmp1020; + fftw_real tmp1033; + fftw_real tmp1036; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp991; + fftw_real tmp994; + fftw_real tmp1024; + fftw_real tmp1025; + ASSERT_ALIGNED_DOUBLE; + tmp991 = tmp19 - tmp42; + tmp994 = tmp992 - tmp993; + tmp995 = tmp991 - tmp994; + tmp1023 = tmp991 + tmp994; + tmp1024 = tmp999 - tmp1000; + tmp1025 = tmp997 + tmp996; + tmp1026 = K707106781 * (tmp1024 + tmp1025); + tmp1068 = K707106781 * (tmp1024 - tmp1025); + } + { + fftw_real tmp998; + fftw_real tmp1001; + fftw_real tmp1061; + fftw_real tmp1062; + ASSERT_ALIGNED_DOUBLE; + tmp998 = tmp996 - tmp997; + tmp1001 = tmp999 + tmp1000; + tmp1002 = K707106781 * (tmp998 - tmp1001); + tmp1060 = K707106781 * (tmp1001 + tmp998); + tmp1061 = tmp66 - tmp89; + tmp1062 = tmp1047 - tmp1042; + tmp1063 = tmp1061 + tmp1062; + tmp1069 = tmp1062 - tmp1061; + } + { + fftw_real tmp1006; + fftw_real tmp1009; + fftw_real tmp1028; + fftw_real tmp1029; + ASSERT_ALIGNED_DOUBLE; + tmp1006 = tmp1004 - tmp1005; + tmp1009 = tmp1007 - tmp1008; + tmp1010 = + (K382683432 * tmp1006) - (K923879532 * tmp1009); + tmp1021 = + (K923879532 * tmp1006) + (K382683432 * tmp1009); + tmp1028 = tmp1004 + tmp1005; + tmp1029 = tmp1007 + tmp1008; + tmp1030 = + (K923879532 * tmp1028) - (K382683432 * tmp1029); + tmp1037 = + (K382683432 * tmp1028) + (K923879532 * tmp1029); + } + { + fftw_real tmp1013; + fftw_real tmp1016; + fftw_real tmp1031; + fftw_real tmp1032; + ASSERT_ALIGNED_DOUBLE; + tmp1013 = tmp1011 - tmp1012; + tmp1016 = tmp1014 - tmp1015; + tmp1017 = + (K382683432 * tmp1013) + (K923879532 * tmp1016); + tmp1020 = + (K382683432 * tmp1016) - (K923879532 * tmp1013); + tmp1031 = tmp1011 + tmp1012; + tmp1032 = tmp1014 + tmp1015; + tmp1033 = + (K923879532 * tmp1031) + (K382683432 * tmp1032); + tmp1036 = + (K923879532 * tmp1032) - (K382683432 * tmp1031); + } + { + fftw_real tmp1003; + fftw_real tmp1018; + fftw_real tmp1019; + fftw_real tmp1022; + ASSERT_ALIGNED_DOUBLE; + tmp1003 = tmp995 - tmp1002; + tmp1018 = tmp1010 - tmp1017; + c_re(inout[60 * iostride]) = tmp1003 - tmp1018; + c_re(inout[28 * iostride]) = tmp1003 + tmp1018; + tmp1019 = tmp995 + tmp1002; + tmp1022 = tmp1020 + tmp1021; + c_re(inout[44 * iostride]) = tmp1019 - tmp1022; + c_re(inout[12 * iostride]) = tmp1019 + tmp1022; + } + { + fftw_real tmp1071; + fftw_real tmp1072; + fftw_real tmp1067; + fftw_real tmp1070; + ASSERT_ALIGNED_DOUBLE; + tmp1071 = tmp1020 - tmp1021; + tmp1072 = tmp1069 - tmp1068; + c_im(inout[28 * iostride]) = tmp1071 + tmp1072; + c_im(inout[60 * iostride]) = tmp1072 - tmp1071; + tmp1067 = tmp1017 + tmp1010; + tmp1070 = tmp1068 + tmp1069; + c_im(inout[12 * iostride]) = tmp1067 + tmp1070; + c_im(inout[44 * iostride]) = tmp1070 - tmp1067; + } + { + fftw_real tmp1027; + fftw_real tmp1034; + fftw_real tmp1035; + fftw_real tmp1038; + ASSERT_ALIGNED_DOUBLE; + tmp1027 = tmp1023 - tmp1026; + tmp1034 = tmp1030 - tmp1033; + c_re(inout[52 * iostride]) = tmp1027 - tmp1034; + c_re(inout[20 * iostride]) = tmp1027 + tmp1034; + tmp1035 = tmp1023 + tmp1026; + tmp1038 = tmp1036 + tmp1037; + c_re(inout[36 * iostride]) = tmp1035 - tmp1038; + c_re(inout[4 * iostride]) = tmp1035 + tmp1038; + } + { + fftw_real tmp1065; + fftw_real tmp1066; + fftw_real tmp1059; + fftw_real tmp1064; + ASSERT_ALIGNED_DOUBLE; + tmp1065 = tmp1036 - tmp1037; + tmp1066 = tmp1063 - tmp1060; + c_im(inout[20 * iostride]) = tmp1065 + tmp1066; + c_im(inout[52 * iostride]) = tmp1066 - tmp1065; + tmp1059 = tmp1033 + tmp1030; + tmp1064 = tmp1060 + tmp1063; + c_im(inout[4 * iostride]) = tmp1059 + tmp1064; + c_im(inout[36 * iostride]) = tmp1064 - tmp1059; + } + } + { + fftw_real tmp419; + fftw_real tmp591; + fftw_real tmp1125; + fftw_real tmp1131; + fftw_real tmp474; + fftw_real tmp1122; + fftw_real tmp594; + fftw_real tmp1130; + fftw_real tmp530; + fftw_real tmp589; + fftw_real tmp598; + fftw_real tmp605; + fftw_real tmp585; + fftw_real tmp588; + fftw_real tmp601; + fftw_real tmp604; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp395; + fftw_real tmp418; + fftw_real tmp1123; + fftw_real tmp1124; + ASSERT_ALIGNED_DOUBLE; + tmp395 = tmp383 - tmp394; + tmp418 = tmp406 - tmp417; + tmp419 = tmp395 - tmp418; + tmp591 = tmp395 + tmp418; + tmp1123 = tmp608 - tmp609; + tmp1124 = tmp1109 - tmp1106; + tmp1125 = tmp1123 + tmp1124; + tmp1131 = tmp1124 - tmp1123; + } + { + fftw_real tmp446; + fftw_real tmp593; + fftw_real tmp473; + fftw_real tmp592; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp436; + fftw_real tmp445; + fftw_real tmp463; + fftw_real tmp472; + ASSERT_ALIGNED_DOUBLE; + tmp436 = tmp424 - tmp435; + tmp445 = tmp441 - tmp444; + tmp446 = + (K555570233 * tmp436) - (K831469612 * tmp445); + tmp593 = + (K555570233 * tmp445) + (K831469612 * tmp436); + tmp463 = tmp451 - tmp462; + tmp472 = tmp468 - tmp471; + tmp473 = + (K831469612 * tmp463) + (K555570233 * tmp472); + tmp592 = + (K555570233 * tmp463) - (K831469612 * tmp472); + } + tmp474 = tmp446 - tmp473; + tmp1122 = tmp473 + tmp446; + tmp594 = tmp592 + tmp593; + tmp1130 = tmp592 - tmp593; + } + { + fftw_real tmp516; + fftw_real tmp596; + fftw_real tmp529; + fftw_real tmp597; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp492; + fftw_real tmp515; + fftw_real tmp525; + fftw_real tmp528; + ASSERT_ALIGNED_DOUBLE; + tmp492 = tmp480 - tmp491; + tmp515 = tmp503 - tmp514; + tmp516 = tmp492 - tmp515; + tmp596 = tmp492 + tmp515; + tmp525 = tmp521 - tmp524; + tmp528 = tmp526 - tmp527; + tmp529 = tmp525 - tmp528; + tmp597 = tmp525 + tmp528; + } + tmp530 = (K290284677 * tmp516) - (K956940335 * tmp529); + tmp589 = (K956940335 * tmp516) + (K290284677 * tmp529); + tmp598 = (K881921264 * tmp596) - (K471396736 * tmp597); + tmp605 = (K471396736 * tmp596) + (K881921264 * tmp597); + } + { + fftw_real tmp571; + fftw_real tmp599; + fftw_real tmp584; + fftw_real tmp600; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp547; + fftw_real tmp570; + fftw_real tmp580; + fftw_real tmp583; + ASSERT_ALIGNED_DOUBLE; + tmp547 = tmp535 - tmp546; + tmp570 = tmp558 - tmp569; + tmp571 = tmp547 - tmp570; + tmp599 = tmp547 + tmp570; + tmp580 = tmp576 - tmp579; + tmp583 = tmp581 - tmp582; + tmp584 = tmp580 - tmp583; + tmp600 = tmp580 + tmp583; + } + tmp585 = (K290284677 * tmp571) + (K956940335 * tmp584); + tmp588 = (K290284677 * tmp584) - (K956940335 * tmp571); + tmp601 = (K881921264 * tmp599) + (K471396736 * tmp600); + tmp604 = (K881921264 * tmp600) - (K471396736 * tmp599); + } + { + fftw_real tmp475; + fftw_real tmp586; + fftw_real tmp587; + fftw_real tmp590; + ASSERT_ALIGNED_DOUBLE; + tmp475 = tmp419 + tmp474; + tmp586 = tmp530 + tmp585; + c_re(inout[45 * iostride]) = tmp475 - tmp586; + c_re(inout[13 * iostride]) = tmp475 + tmp586; + tmp587 = tmp419 - tmp474; + tmp590 = tmp588 - tmp589; + c_re(inout[61 * iostride]) = tmp587 - tmp590; + c_re(inout[29 * iostride]) = tmp587 + tmp590; + } + { + fftw_real tmp1129; + fftw_real tmp1132; + fftw_real tmp1133; + fftw_real tmp1134; + ASSERT_ALIGNED_DOUBLE; + tmp1129 = tmp589 + tmp588; + tmp1132 = tmp1130 + tmp1131; + c_im(inout[13 * iostride]) = tmp1129 + tmp1132; + c_im(inout[45 * iostride]) = tmp1132 - tmp1129; + tmp1133 = tmp530 - tmp585; + tmp1134 = tmp1131 - tmp1130; + c_im(inout[29 * iostride]) = tmp1133 + tmp1134; + c_im(inout[61 * iostride]) = tmp1134 - tmp1133; + } + { + fftw_real tmp595; + fftw_real tmp602; + fftw_real tmp603; + fftw_real tmp606; + ASSERT_ALIGNED_DOUBLE; + tmp595 = tmp591 + tmp594; + tmp602 = tmp598 + tmp601; + c_re(inout[37 * iostride]) = tmp595 - tmp602; + c_re(inout[5 * iostride]) = tmp595 + tmp602; + tmp603 = tmp591 - tmp594; + tmp606 = tmp604 - tmp605; + c_re(inout[53 * iostride]) = tmp603 - tmp606; + c_re(inout[21 * iostride]) = tmp603 + tmp606; + } + { + fftw_real tmp1121; + fftw_real tmp1126; + fftw_real tmp1127; + fftw_real tmp1128; + ASSERT_ALIGNED_DOUBLE; + tmp1121 = tmp605 + tmp604; + tmp1126 = tmp1122 + tmp1125; + c_im(inout[5 * iostride]) = tmp1121 + tmp1126; + c_im(inout[37 * iostride]) = tmp1126 - tmp1121; + tmp1127 = tmp598 - tmp601; + tmp1128 = tmp1125 - tmp1122; + c_im(inout[21 * iostride]) = tmp1127 + tmp1128; + c_im(inout[53 * iostride]) = tmp1128 - tmp1127; + } + } + { + fftw_real tmp611; + fftw_real tmp639; + fftw_real tmp1111; + fftw_real tmp1117; + fftw_real tmp618; + fftw_real tmp1104; + fftw_real tmp642; + fftw_real tmp1116; + fftw_real tmp626; + fftw_real tmp637; + fftw_real tmp646; + fftw_real tmp653; + fftw_real tmp633; + fftw_real tmp636; + fftw_real tmp649; + fftw_real tmp652; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp607; + fftw_real tmp610; + fftw_real tmp1105; + fftw_real tmp1110; + ASSERT_ALIGNED_DOUBLE; + tmp607 = tmp383 + tmp394; + tmp610 = tmp608 + tmp609; + tmp611 = tmp607 - tmp610; + tmp639 = tmp607 + tmp610; + tmp1105 = tmp417 + tmp406; + tmp1110 = tmp1106 + tmp1109; + tmp1111 = tmp1105 + tmp1110; + tmp1117 = tmp1110 - tmp1105; + } + { + fftw_real tmp614; + fftw_real tmp641; + fftw_real tmp617; + fftw_real tmp640; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp612; + fftw_real tmp613; + fftw_real tmp615; + fftw_real tmp616; + ASSERT_ALIGNED_DOUBLE; + tmp612 = tmp424 + tmp435; + tmp613 = tmp441 + tmp444; + tmp614 = + (K980785280 * tmp612) - (K195090322 * tmp613); + tmp641 = + (K980785280 * tmp613) + (K195090322 * tmp612); + tmp615 = tmp451 + tmp462; + tmp616 = tmp468 + tmp471; + tmp617 = + (K195090322 * tmp615) + (K980785280 * tmp616); + tmp640 = + (K980785280 * tmp615) - (K195090322 * tmp616); + } + tmp618 = tmp614 - tmp617; + tmp1104 = tmp617 + tmp614; + tmp642 = tmp640 + tmp641; + tmp1116 = tmp640 - tmp641; + } + { + fftw_real tmp622; + fftw_real tmp644; + fftw_real tmp625; + fftw_real tmp645; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp620; + fftw_real tmp621; + fftw_real tmp623; + fftw_real tmp624; + ASSERT_ALIGNED_DOUBLE; + tmp620 = tmp576 + tmp579; + tmp621 = tmp569 + tmp558; + tmp622 = tmp620 - tmp621; + tmp644 = tmp620 + tmp621; + tmp623 = tmp535 + tmp546; + tmp624 = tmp581 + tmp582; + tmp625 = tmp623 - tmp624; + tmp645 = tmp623 + tmp624; + } + tmp626 = (K634393284 * tmp622) - (K773010453 * tmp625); + tmp637 = (K773010453 * tmp622) + (K634393284 * tmp625); + tmp646 = (K995184726 * tmp644) - (K098017140 * tmp645); + tmp653 = (K098017140 * tmp644) + (K995184726 * tmp645); + } + { + fftw_real tmp629; + fftw_real tmp647; + fftw_real tmp632; + fftw_real tmp648; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp627; + fftw_real tmp628; + fftw_real tmp630; + fftw_real tmp631; + ASSERT_ALIGNED_DOUBLE; + tmp627 = tmp521 + tmp524; + tmp628 = tmp514 + tmp503; + tmp629 = tmp627 - tmp628; + tmp647 = tmp627 + tmp628; + tmp630 = tmp480 + tmp491; + tmp631 = tmp526 + tmp527; + tmp632 = tmp630 - tmp631; + tmp648 = tmp630 + tmp631; + } + tmp633 = (K634393284 * tmp629) + (K773010453 * tmp632); + tmp636 = (K634393284 * tmp632) - (K773010453 * tmp629); + tmp649 = (K995184726 * tmp647) + (K098017140 * tmp648); + tmp652 = (K995184726 * tmp648) - (K098017140 * tmp647); + } + { + fftw_real tmp619; + fftw_real tmp634; + fftw_real tmp635; + fftw_real tmp638; + ASSERT_ALIGNED_DOUBLE; + tmp619 = tmp611 - tmp618; + tmp634 = tmp626 - tmp633; + c_re(inout[57 * iostride]) = tmp619 - tmp634; + c_re(inout[25 * iostride]) = tmp619 + tmp634; + tmp635 = tmp611 + tmp618; + tmp638 = tmp636 + tmp637; + c_re(inout[41 * iostride]) = tmp635 - tmp638; + c_re(inout[9 * iostride]) = tmp635 + tmp638; + } + { + fftw_real tmp1119; + fftw_real tmp1120; + fftw_real tmp1115; + fftw_real tmp1118; + ASSERT_ALIGNED_DOUBLE; + tmp1119 = tmp636 - tmp637; + tmp1120 = tmp1117 - tmp1116; + c_im(inout[25 * iostride]) = tmp1119 + tmp1120; + c_im(inout[57 * iostride]) = tmp1120 - tmp1119; + tmp1115 = tmp633 + tmp626; + tmp1118 = tmp1116 + tmp1117; + c_im(inout[9 * iostride]) = tmp1115 + tmp1118; + c_im(inout[41 * iostride]) = tmp1118 - tmp1115; + } + { + fftw_real tmp643; + fftw_real tmp650; + fftw_real tmp651; + fftw_real tmp654; + ASSERT_ALIGNED_DOUBLE; + tmp643 = tmp639 - tmp642; + tmp650 = tmp646 - tmp649; + c_re(inout[49 * iostride]) = tmp643 - tmp650; + c_re(inout[17 * iostride]) = tmp643 + tmp650; + tmp651 = tmp639 + tmp642; + tmp654 = tmp652 + tmp653; + c_re(inout[33 * iostride]) = tmp651 - tmp654; + c_re(inout[iostride]) = tmp651 + tmp654; + } + { + fftw_real tmp1113; + fftw_real tmp1114; + fftw_real tmp1103; + fftw_real tmp1112; + ASSERT_ALIGNED_DOUBLE; + tmp1113 = tmp652 - tmp653; + tmp1114 = tmp1111 - tmp1104; + c_im(inout[17 * iostride]) = tmp1113 + tmp1114; + c_im(inout[49 * iostride]) = tmp1114 - tmp1113; + tmp1103 = tmp649 + tmp646; + tmp1112 = tmp1104 + tmp1111; + c_im(inout[iostride]) = tmp1103 + tmp1112; + c_im(inout[33 * iostride]) = tmp1112 - tmp1103; + } + } + { + fftw_real tmp807; + fftw_real tmp891; + fftw_real tmp830; + fftw_real tmp1074; + fftw_real tmp1079; + fftw_real tmp1085; + fftw_real tmp894; + fftw_real tmp1084; + fftw_real tmp885; + fftw_real tmp888; + fftw_real tmp901; + fftw_real tmp904; + fftw_real tmp858; + fftw_real tmp889; + fftw_real tmp898; + fftw_real tmp905; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp795; + fftw_real tmp806; + fftw_real tmp892; + fftw_real tmp893; + ASSERT_ALIGNED_DOUBLE; + tmp795 = tmp791 + tmp794; + tmp806 = K707106781 * (tmp800 + tmp805); + tmp807 = tmp795 - tmp806; + tmp891 = tmp795 + tmp806; + { + fftw_real tmp818; + fftw_real tmp829; + fftw_real tmp1075; + fftw_real tmp1078; + ASSERT_ALIGNED_DOUBLE; + tmp818 = + (K923879532 * tmp812) - (K382683432 * tmp817); + tmp829 = + (K923879532 * tmp823) + (K382683432 * tmp828); + tmp830 = tmp818 - tmp829; + tmp1074 = tmp829 + tmp818; + tmp1075 = K707106781 * (tmp909 + tmp908); + tmp1078 = tmp1076 + tmp1077; + tmp1079 = tmp1075 + tmp1078; + tmp1085 = tmp1078 - tmp1075; + } + tmp892 = (K923879532 * tmp828) - (K382683432 * tmp823); + tmp893 = (K382683432 * tmp812) + (K923879532 * tmp817); + tmp894 = tmp892 + tmp893; + tmp1084 = tmp892 - tmp893; + { + fftw_real tmp875; + fftw_real tmp899; + fftw_real tmp884; + fftw_real tmp900; + fftw_real tmp874; + fftw_real tmp883; + ASSERT_ALIGNED_DOUBLE; + tmp874 = K707106781 * (tmp868 + tmp873); + tmp875 = tmp863 - tmp874; + tmp899 = tmp863 + tmp874; + tmp883 = K707106781 * (tmp881 + tmp882); + tmp884 = tmp880 - tmp883; + tmp900 = tmp880 + tmp883; + tmp885 = + (K555570233 * tmp875) + (K831469612 * tmp884); + tmp888 = + (K555570233 * tmp884) - (K831469612 * tmp875); + tmp901 = + (K980785280 * tmp899) + (K195090322 * tmp900); + tmp904 = + (K980785280 * tmp900) - (K195090322 * tmp899); + } + { + fftw_real tmp848; + fftw_real tmp896; + fftw_real tmp857; + fftw_real tmp897; + fftw_real tmp847; + fftw_real tmp856; + ASSERT_ALIGNED_DOUBLE; + tmp847 = K707106781 * (tmp841 + tmp846); + tmp848 = tmp836 - tmp847; + tmp896 = tmp836 + tmp847; + tmp856 = K707106781 * (tmp854 + tmp855); + tmp857 = tmp853 - tmp856; + tmp897 = tmp853 + tmp856; + tmp858 = + (K555570233 * tmp848) - (K831469612 * tmp857); + tmp889 = + (K831469612 * tmp848) + (K555570233 * tmp857); + tmp898 = + (K980785280 * tmp896) - (K195090322 * tmp897); + tmp905 = + (K195090322 * tmp896) + (K980785280 * tmp897); + } + } + { + fftw_real tmp831; + fftw_real tmp886; + fftw_real tmp887; + fftw_real tmp890; + ASSERT_ALIGNED_DOUBLE; + tmp831 = tmp807 + tmp830; + tmp886 = tmp858 + tmp885; + c_re(inout[42 * iostride]) = tmp831 - tmp886; + c_re(inout[10 * iostride]) = tmp831 + tmp886; + tmp887 = tmp807 - tmp830; + tmp890 = tmp888 - tmp889; + c_re(inout[58 * iostride]) = tmp887 - tmp890; + c_re(inout[26 * iostride]) = tmp887 + tmp890; + } + { + fftw_real tmp1083; + fftw_real tmp1086; + fftw_real tmp1087; + fftw_real tmp1088; + ASSERT_ALIGNED_DOUBLE; + tmp1083 = tmp889 + tmp888; + tmp1086 = tmp1084 + tmp1085; + c_im(inout[10 * iostride]) = tmp1083 + tmp1086; + c_im(inout[42 * iostride]) = tmp1086 - tmp1083; + tmp1087 = tmp858 - tmp885; + tmp1088 = tmp1085 - tmp1084; + c_im(inout[26 * iostride]) = tmp1087 + tmp1088; + c_im(inout[58 * iostride]) = tmp1088 - tmp1087; + } + { + fftw_real tmp895; + fftw_real tmp902; + fftw_real tmp903; + fftw_real tmp906; + ASSERT_ALIGNED_DOUBLE; + tmp895 = tmp891 + tmp894; + tmp902 = tmp898 + tmp901; + c_re(inout[34 * iostride]) = tmp895 - tmp902; + c_re(inout[2 * iostride]) = tmp895 + tmp902; + tmp903 = tmp891 - tmp894; + tmp906 = tmp904 - tmp905; + c_re(inout[50 * iostride]) = tmp903 - tmp906; + c_re(inout[18 * iostride]) = tmp903 + tmp906; + } + { + fftw_real tmp1073; + fftw_real tmp1080; + fftw_real tmp1081; + fftw_real tmp1082; + ASSERT_ALIGNED_DOUBLE; + tmp1073 = tmp905 + tmp904; + tmp1080 = tmp1074 + tmp1079; + c_im(inout[2 * iostride]) = tmp1073 + tmp1080; + c_im(inout[34 * iostride]) = tmp1080 - tmp1073; + tmp1081 = tmp898 - tmp901; + tmp1082 = tmp1079 - tmp1074; + c_im(inout[18 * iostride]) = tmp1081 + tmp1082; + c_im(inout[50 * iostride]) = tmp1082 - tmp1081; + } + } + { + fftw_real tmp911; + fftw_real tmp939; + fftw_real tmp918; + fftw_real tmp1090; + fftw_real tmp1093; + fftw_real tmp1099; + fftw_real tmp942; + fftw_real tmp1098; + fftw_real tmp933; + fftw_real tmp936; + fftw_real tmp949; + fftw_real tmp952; + fftw_real tmp926; + fftw_real tmp937; + fftw_real tmp946; + fftw_real tmp953; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp907; + fftw_real tmp910; + fftw_real tmp940; + fftw_real tmp941; + ASSERT_ALIGNED_DOUBLE; + tmp907 = tmp791 - tmp794; + tmp910 = K707106781 * (tmp908 - tmp909); + tmp911 = tmp907 - tmp910; + tmp939 = tmp907 + tmp910; + { + fftw_real tmp914; + fftw_real tmp917; + fftw_real tmp1091; + fftw_real tmp1092; + ASSERT_ALIGNED_DOUBLE; + tmp914 = + (K382683432 * tmp912) - (K923879532 * tmp913); + tmp917 = + (K382683432 * tmp915) + (K923879532 * tmp916); + tmp918 = tmp914 - tmp917; + tmp1090 = tmp917 + tmp914; + tmp1091 = K707106781 * (tmp800 - tmp805); + tmp1092 = tmp1077 - tmp1076; + tmp1093 = tmp1091 + tmp1092; + tmp1099 = tmp1092 - tmp1091; + } + tmp940 = (K382683432 * tmp916) - (K923879532 * tmp915); + tmp941 = (K923879532 * tmp912) + (K382683432 * tmp913); + tmp942 = tmp940 + tmp941; + tmp1098 = tmp940 - tmp941; + { + fftw_real tmp929; + fftw_real tmp947; + fftw_real tmp932; + fftw_real tmp948; + fftw_real tmp928; + fftw_real tmp931; + ASSERT_ALIGNED_DOUBLE; + tmp928 = K707106781 * (tmp841 - tmp846); + tmp929 = tmp927 - tmp928; + tmp947 = tmp927 + tmp928; + tmp931 = K707106781 * (tmp855 - tmp854); + tmp932 = tmp930 - tmp931; + tmp948 = tmp930 + tmp931; + tmp933 = + (K195090322 * tmp929) + (K980785280 * tmp932); + tmp936 = + (K195090322 * tmp932) - (K980785280 * tmp929); + tmp949 = + (K831469612 * tmp947) + (K555570233 * tmp948); + tmp952 = + (K831469612 * tmp948) - (K555570233 * tmp947); + } + { + fftw_real tmp922; + fftw_real tmp944; + fftw_real tmp925; + fftw_real tmp945; + fftw_real tmp921; + fftw_real tmp924; + ASSERT_ALIGNED_DOUBLE; + tmp921 = K707106781 * (tmp868 - tmp873); + tmp922 = tmp920 - tmp921; + tmp944 = tmp920 + tmp921; + tmp924 = K707106781 * (tmp882 - tmp881); + tmp925 = tmp923 - tmp924; + tmp945 = tmp923 + tmp924; + tmp926 = + (K195090322 * tmp922) - (K980785280 * tmp925); + tmp937 = + (K980785280 * tmp922) + (K195090322 * tmp925); + tmp946 = + (K831469612 * tmp944) - (K555570233 * tmp945); + tmp953 = + (K555570233 * tmp944) + (K831469612 * tmp945); + } + } + { + fftw_real tmp919; + fftw_real tmp934; + fftw_real tmp935; + fftw_real tmp938; + ASSERT_ALIGNED_DOUBLE; + tmp919 = tmp911 - tmp918; + tmp934 = tmp926 - tmp933; + c_re(inout[62 * iostride]) = tmp919 - tmp934; + c_re(inout[30 * iostride]) = tmp919 + tmp934; + tmp935 = tmp911 + tmp918; + tmp938 = tmp936 + tmp937; + c_re(inout[46 * iostride]) = tmp935 - tmp938; + c_re(inout[14 * iostride]) = tmp935 + tmp938; + } + { + fftw_real tmp1101; + fftw_real tmp1102; + fftw_real tmp1097; + fftw_real tmp1100; + ASSERT_ALIGNED_DOUBLE; + tmp1101 = tmp936 - tmp937; + tmp1102 = tmp1099 - tmp1098; + c_im(inout[30 * iostride]) = tmp1101 + tmp1102; + c_im(inout[62 * iostride]) = tmp1102 - tmp1101; + tmp1097 = tmp933 + tmp926; + tmp1100 = tmp1098 + tmp1099; + c_im(inout[14 * iostride]) = tmp1097 + tmp1100; + c_im(inout[46 * iostride]) = tmp1100 - tmp1097; + } + { + fftw_real tmp943; + fftw_real tmp950; + fftw_real tmp951; + fftw_real tmp954; + ASSERT_ALIGNED_DOUBLE; + tmp943 = tmp939 - tmp942; + tmp950 = tmp946 - tmp949; + c_re(inout[54 * iostride]) = tmp943 - tmp950; + c_re(inout[22 * iostride]) = tmp943 + tmp950; + tmp951 = tmp939 + tmp942; + tmp954 = tmp952 + tmp953; + c_re(inout[38 * iostride]) = tmp951 - tmp954; + c_re(inout[6 * iostride]) = tmp951 + tmp954; + } + { + fftw_real tmp1095; + fftw_real tmp1096; + fftw_real tmp1089; + fftw_real tmp1094; + ASSERT_ALIGNED_DOUBLE; + tmp1095 = tmp952 - tmp953; + tmp1096 = tmp1093 - tmp1090; + c_im(inout[22 * iostride]) = tmp1095 + tmp1096; + c_im(inout[54 * iostride]) = tmp1096 - tmp1095; + tmp1089 = tmp949 + tmp946; + tmp1094 = tmp1090 + tmp1093; + c_im(inout[6 * iostride]) = tmp1089 + tmp1094; + c_im(inout[38 * iostride]) = tmp1094 - tmp1089; + } + } + { + fftw_real tmp667; + fftw_real tmp727; + fftw_real tmp1155; + fftw_real tmp1161; + fftw_real tmp682; + fftw_real tmp1152; + fftw_real tmp730; + fftw_real tmp1160; + fftw_real tmp702; + fftw_real tmp725; + fftw_real tmp734; + fftw_real tmp741; + fftw_real tmp721; + fftw_real tmp724; + fftw_real tmp737; + fftw_real tmp740; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp659; + fftw_real tmp666; + fftw_real tmp1153; + fftw_real tmp1154; + ASSERT_ALIGNED_DOUBLE; + tmp659 = tmp655 - tmp658; + tmp666 = tmp662 - tmp665; + tmp667 = tmp659 - tmp666; + tmp727 = tmp659 + tmp666; + tmp1153 = tmp744 - tmp745; + tmp1154 = tmp1139 - tmp1138; + tmp1155 = tmp1153 + tmp1154; + tmp1161 = tmp1154 - tmp1153; + } + { + fftw_real tmp674; + fftw_real tmp729; + fftw_real tmp681; + fftw_real tmp728; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp670; + fftw_real tmp673; + fftw_real tmp677; + fftw_real tmp680; + ASSERT_ALIGNED_DOUBLE; + tmp670 = tmp668 - tmp669; + tmp673 = tmp671 - tmp672; + tmp674 = + (K195090322 * tmp670) - (K980785280 * tmp673); + tmp729 = + (K980785280 * tmp670) + (K195090322 * tmp673); + tmp677 = tmp675 - tmp676; + tmp680 = tmp678 - tmp679; + tmp681 = + (K195090322 * tmp677) + (K980785280 * tmp680); + tmp728 = + (K195090322 * tmp680) - (K980785280 * tmp677); + } + tmp682 = tmp674 - tmp681; + tmp1152 = tmp681 + tmp674; + tmp730 = tmp728 + tmp729; + tmp1160 = tmp728 - tmp729; + } + { + fftw_real tmp694; + fftw_real tmp732; + fftw_real tmp701; + fftw_real tmp733; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp686; + fftw_real tmp693; + fftw_real tmp697; + fftw_real tmp700; + ASSERT_ALIGNED_DOUBLE; + tmp686 = tmp684 - tmp685; + tmp693 = tmp689 - tmp692; + tmp694 = tmp686 - tmp693; + tmp732 = tmp686 + tmp693; + tmp697 = tmp695 - tmp696; + tmp700 = tmp698 - tmp699; + tmp701 = tmp697 - tmp700; + tmp733 = tmp697 + tmp700; + } + tmp702 = (K098017140 * tmp694) - (K995184726 * tmp701); + tmp725 = (K995184726 * tmp694) + (K098017140 * tmp701); + tmp734 = (K773010453 * tmp732) - (K634393284 * tmp733); + tmp741 = (K634393284 * tmp732) + (K773010453 * tmp733); + } + { + fftw_real tmp713; + fftw_real tmp735; + fftw_real tmp720; + fftw_real tmp736; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp705; + fftw_real tmp712; + fftw_real tmp716; + fftw_real tmp719; + ASSERT_ALIGNED_DOUBLE; + tmp705 = tmp703 - tmp704; + tmp712 = tmp708 - tmp711; + tmp713 = tmp705 - tmp712; + tmp735 = tmp705 + tmp712; + tmp716 = tmp714 - tmp715; + tmp719 = tmp717 - tmp718; + tmp720 = tmp716 - tmp719; + tmp736 = tmp716 + tmp719; + } + tmp721 = (K098017140 * tmp713) + (K995184726 * tmp720); + tmp724 = (K098017140 * tmp720) - (K995184726 * tmp713); + tmp737 = (K773010453 * tmp735) + (K634393284 * tmp736); + tmp740 = (K773010453 * tmp736) - (K634393284 * tmp735); + } + { + fftw_real tmp683; + fftw_real tmp722; + fftw_real tmp723; + fftw_real tmp726; + ASSERT_ALIGNED_DOUBLE; + tmp683 = tmp667 + tmp682; + tmp722 = tmp702 + tmp721; + c_re(inout[47 * iostride]) = tmp683 - tmp722; + c_re(inout[15 * iostride]) = tmp683 + tmp722; + tmp723 = tmp667 - tmp682; + tmp726 = tmp724 - tmp725; + c_re(inout[63 * iostride]) = tmp723 - tmp726; + c_re(inout[31 * iostride]) = tmp723 + tmp726; + } + { + fftw_real tmp1159; + fftw_real tmp1162; + fftw_real tmp1163; + fftw_real tmp1164; + ASSERT_ALIGNED_DOUBLE; + tmp1159 = tmp725 + tmp724; + tmp1162 = tmp1160 + tmp1161; + c_im(inout[15 * iostride]) = tmp1159 + tmp1162; + c_im(inout[47 * iostride]) = tmp1162 - tmp1159; + tmp1163 = tmp702 - tmp721; + tmp1164 = tmp1161 - tmp1160; + c_im(inout[31 * iostride]) = tmp1163 + tmp1164; + c_im(inout[63 * iostride]) = tmp1164 - tmp1163; + } + { + fftw_real tmp731; + fftw_real tmp738; + fftw_real tmp739; + fftw_real tmp742; + ASSERT_ALIGNED_DOUBLE; + tmp731 = tmp727 + tmp730; + tmp738 = tmp734 + tmp737; + c_re(inout[39 * iostride]) = tmp731 - tmp738; + c_re(inout[7 * iostride]) = tmp731 + tmp738; + tmp739 = tmp727 - tmp730; + tmp742 = tmp740 - tmp741; + c_re(inout[55 * iostride]) = tmp739 - tmp742; + c_re(inout[23 * iostride]) = tmp739 + tmp742; + } + { + fftw_real tmp1151; + fftw_real tmp1156; + fftw_real tmp1157; + fftw_real tmp1158; + ASSERT_ALIGNED_DOUBLE; + tmp1151 = tmp741 + tmp740; + tmp1156 = tmp1152 + tmp1155; + c_im(inout[7 * iostride]) = tmp1151 + tmp1156; + c_im(inout[39 * iostride]) = tmp1156 - tmp1151; + tmp1157 = tmp734 - tmp737; + tmp1158 = tmp1155 - tmp1152; + c_im(inout[23 * iostride]) = tmp1157 + tmp1158; + c_im(inout[55 * iostride]) = tmp1158 - tmp1157; + } + } + { + fftw_real tmp747; + fftw_real tmp775; + fftw_real tmp1141; + fftw_real tmp1147; + fftw_real tmp754; + fftw_real tmp1136; + fftw_real tmp778; + fftw_real tmp1146; + fftw_real tmp762; + fftw_real tmp773; + fftw_real tmp782; + fftw_real tmp789; + fftw_real tmp769; + fftw_real tmp772; + fftw_real tmp785; + fftw_real tmp788; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp743; + fftw_real tmp746; + fftw_real tmp1137; + fftw_real tmp1140; + ASSERT_ALIGNED_DOUBLE; + tmp743 = tmp655 + tmp658; + tmp746 = tmp744 + tmp745; + tmp747 = tmp743 - tmp746; + tmp775 = tmp743 + tmp746; + tmp1137 = tmp665 + tmp662; + tmp1140 = tmp1138 + tmp1139; + tmp1141 = tmp1137 + tmp1140; + tmp1147 = tmp1140 - tmp1137; + } + { + fftw_real tmp750; + fftw_real tmp777; + fftw_real tmp753; + fftw_real tmp776; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp748; + fftw_real tmp749; + fftw_real tmp751; + fftw_real tmp752; + ASSERT_ALIGNED_DOUBLE; + tmp748 = tmp668 + tmp669; + tmp749 = tmp671 + tmp672; + tmp750 = + (K831469612 * tmp748) - (K555570233 * tmp749); + tmp777 = + (K555570233 * tmp748) + (K831469612 * tmp749); + tmp751 = tmp675 + tmp676; + tmp752 = tmp678 + tmp679; + tmp753 = + (K831469612 * tmp751) + (K555570233 * tmp752); + tmp776 = + (K831469612 * tmp752) - (K555570233 * tmp751); + } + tmp754 = tmp750 - tmp753; + tmp1136 = tmp753 + tmp750; + tmp778 = tmp776 + tmp777; + tmp1146 = tmp776 - tmp777; + } + { + fftw_real tmp758; + fftw_real tmp780; + fftw_real tmp761; + fftw_real tmp781; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp756; + fftw_real tmp757; + fftw_real tmp759; + fftw_real tmp760; + ASSERT_ALIGNED_DOUBLE; + tmp756 = tmp714 + tmp715; + tmp757 = tmp711 + tmp708; + tmp758 = tmp756 - tmp757; + tmp780 = tmp756 + tmp757; + tmp759 = tmp703 + tmp704; + tmp760 = tmp717 + tmp718; + tmp761 = tmp759 - tmp760; + tmp781 = tmp759 + tmp760; + } + tmp762 = (K471396736 * tmp758) - (K881921264 * tmp761); + tmp773 = (K881921264 * tmp758) + (K471396736 * tmp761); + tmp782 = (K956940335 * tmp780) - (K290284677 * tmp781); + tmp789 = (K290284677 * tmp780) + (K956940335 * tmp781); + } + { + fftw_real tmp765; + fftw_real tmp783; + fftw_real tmp768; + fftw_real tmp784; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp763; + fftw_real tmp764; + fftw_real tmp766; + fftw_real tmp767; + ASSERT_ALIGNED_DOUBLE; + tmp763 = tmp695 + tmp696; + tmp764 = tmp692 + tmp689; + tmp765 = tmp763 - tmp764; + tmp783 = tmp763 + tmp764; + tmp766 = tmp684 + tmp685; + tmp767 = tmp698 + tmp699; + tmp768 = tmp766 - tmp767; + tmp784 = tmp766 + tmp767; + } + tmp769 = (K471396736 * tmp765) + (K881921264 * tmp768); + tmp772 = (K471396736 * tmp768) - (K881921264 * tmp765); + tmp785 = (K956940335 * tmp783) + (K290284677 * tmp784); + tmp788 = (K956940335 * tmp784) - (K290284677 * tmp783); + } + { + fftw_real tmp755; + fftw_real tmp770; + fftw_real tmp771; + fftw_real tmp774; + ASSERT_ALIGNED_DOUBLE; + tmp755 = tmp747 - tmp754; + tmp770 = tmp762 - tmp769; + c_re(inout[59 * iostride]) = tmp755 - tmp770; + c_re(inout[27 * iostride]) = tmp755 + tmp770; + tmp771 = tmp747 + tmp754; + tmp774 = tmp772 + tmp773; + c_re(inout[43 * iostride]) = tmp771 - tmp774; + c_re(inout[11 * iostride]) = tmp771 + tmp774; + } + { + fftw_real tmp1149; + fftw_real tmp1150; + fftw_real tmp1145; + fftw_real tmp1148; + ASSERT_ALIGNED_DOUBLE; + tmp1149 = tmp772 - tmp773; + tmp1150 = tmp1147 - tmp1146; + c_im(inout[27 * iostride]) = tmp1149 + tmp1150; + c_im(inout[59 * iostride]) = tmp1150 - tmp1149; + tmp1145 = tmp769 + tmp762; + tmp1148 = tmp1146 + tmp1147; + c_im(inout[11 * iostride]) = tmp1145 + tmp1148; + c_im(inout[43 * iostride]) = tmp1148 - tmp1145; + } + { + fftw_real tmp779; + fftw_real tmp786; + fftw_real tmp787; + fftw_real tmp790; + ASSERT_ALIGNED_DOUBLE; + tmp779 = tmp775 - tmp778; + tmp786 = tmp782 - tmp785; + c_re(inout[51 * iostride]) = tmp779 - tmp786; + c_re(inout[19 * iostride]) = tmp779 + tmp786; + tmp787 = tmp775 + tmp778; + tmp790 = tmp788 + tmp789; + c_re(inout[35 * iostride]) = tmp787 - tmp790; + c_re(inout[3 * iostride]) = tmp787 + tmp790; + } + { + fftw_real tmp1143; + fftw_real tmp1144; + fftw_real tmp1135; + fftw_real tmp1142; + ASSERT_ALIGNED_DOUBLE; + tmp1143 = tmp788 - tmp789; + tmp1144 = tmp1141 - tmp1136; + c_im(inout[19 * iostride]) = tmp1143 + tmp1144; + c_im(inout[51 * iostride]) = tmp1144 - tmp1143; + tmp1135 = tmp785 + tmp782; + tmp1142 = tmp1136 + tmp1141; + c_im(inout[3 * iostride]) = tmp1135 + tmp1142; + c_im(inout[35 * iostride]) = tmp1142 - tmp1135; + } + } + } +} + +static const int twiddle_order[] = + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, +20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, +44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 }; +fftw_codelet_desc fftwi_twiddle_64_desc = { + "fftwi_twiddle_64", + (void (*)()) fftwi_twiddle_64, + 64, + FFTW_BACKWARD, + FFTW_TWIDDLE, + 1419, + 63, + twiddle_order, +}; diff --git a/src/fftw/ftwi_7.c b/src/fftw/ftwi_7.c new file mode 100644 index 0000000..ea3fbc3 --- /dev/null +++ b/src/fftw/ftwi_7.c @@ -0,0 +1,272 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:08:26 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 7 */ + +/* + * This function contains 72 FP additions, 60 FP multiplications, + * (or, 60 additions, 48 multiplications, 12 fused multiply/add), + * 24 stack variables, and 28 memory accesses + */ +static const fftw_real K222520933 = +FFTW_KONST(+0.222520933956314404288902564496794759466355569); +static const fftw_real K900968867 = +FFTW_KONST(+0.900968867902419126236102319507445051165919162); +static const fftw_real K623489801 = +FFTW_KONST(+0.623489801858733530525004884004239810632274731); +static const fftw_real K433883739 = +FFTW_KONST(+0.433883739117558120475768332848358754609990728); +static const fftw_real K974927912 = +FFTW_KONST(+0.974927912181823607018131682993931217232785801); +static const fftw_real K781831482 = +FFTW_KONST(+0.781831482468029808708444526674057750232334519); + +/* + * Generator Id's : + * $Id: ftwi_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_twiddle_7(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 6) { + fftw_real tmp1; + fftw_real tmp53; + fftw_real tmp12; + fftw_real tmp54; + fftw_real tmp38; + fftw_real tmp50; + fftw_real tmp23; + fftw_real tmp55; + fftw_real tmp44; + fftw_real tmp51; + fftw_real tmp34; + fftw_real tmp56; + fftw_real tmp41; + fftw_real tmp52; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp53 = c_im(inout[0]); + { + fftw_real tmp6; + fftw_real tmp37; + fftw_real tmp11; + fftw_real tmp36; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[iostride]); + tmp5 = c_im(inout[iostride]); + tmp2 = c_re(W[0]); + tmp4 = c_im(W[0]); + tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5); + tmp37 = (tmp2 * tmp5) - (tmp4 * tmp3); + } + { + fftw_real tmp8; + fftw_real tmp10; + fftw_real tmp7; + fftw_real tmp9; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(inout[6 * iostride]); + tmp10 = c_im(inout[6 * iostride]); + tmp7 = c_re(W[5]); + tmp9 = c_im(W[5]); + tmp11 = (tmp7 * tmp8) + (tmp9 * tmp10); + tmp36 = (tmp7 * tmp10) - (tmp9 * tmp8); + } + tmp12 = tmp6 + tmp11; + tmp54 = tmp6 - tmp11; + tmp38 = tmp36 - tmp37; + tmp50 = tmp37 + tmp36; + } + { + fftw_real tmp17; + fftw_real tmp43; + fftw_real tmp22; + fftw_real tmp42; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[2 * iostride]); + tmp16 = c_im(inout[2 * iostride]); + tmp13 = c_re(W[1]); + tmp15 = c_im(W[1]); + tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16); + tmp43 = (tmp13 * tmp16) - (tmp15 * tmp14); + } + { + fftw_real tmp19; + fftw_real tmp21; + fftw_real tmp18; + fftw_real tmp20; + ASSERT_ALIGNED_DOUBLE; + tmp19 = c_re(inout[5 * iostride]); + tmp21 = c_im(inout[5 * iostride]); + tmp18 = c_re(W[4]); + tmp20 = c_im(W[4]); + tmp22 = (tmp18 * tmp19) + (tmp20 * tmp21); + tmp42 = (tmp18 * tmp21) - (tmp20 * tmp19); + } + tmp23 = tmp17 + tmp22; + tmp55 = tmp17 - tmp22; + tmp44 = tmp42 - tmp43; + tmp51 = tmp43 + tmp42; + } + { + fftw_real tmp28; + fftw_real tmp40; + fftw_real tmp33; + fftw_real tmp39; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp25; + fftw_real tmp27; + fftw_real tmp24; + fftw_real tmp26; + ASSERT_ALIGNED_DOUBLE; + tmp25 = c_re(inout[3 * iostride]); + tmp27 = c_im(inout[3 * iostride]); + tmp24 = c_re(W[2]); + tmp26 = c_im(W[2]); + tmp28 = (tmp24 * tmp25) + (tmp26 * tmp27); + tmp40 = (tmp24 * tmp27) - (tmp26 * tmp25); + } + { + fftw_real tmp30; + fftw_real tmp32; + fftw_real tmp29; + fftw_real tmp31; + ASSERT_ALIGNED_DOUBLE; + tmp30 = c_re(inout[4 * iostride]); + tmp32 = c_im(inout[4 * iostride]); + tmp29 = c_re(W[3]); + tmp31 = c_im(W[3]); + tmp33 = (tmp29 * tmp30) + (tmp31 * tmp32); + tmp39 = (tmp29 * tmp32) - (tmp31 * tmp30); + } + tmp34 = tmp28 + tmp33; + tmp56 = tmp28 - tmp33; + tmp41 = tmp39 - tmp40; + tmp52 = tmp40 + tmp39; + } + { + fftw_real tmp47; + fftw_real tmp46; + fftw_real tmp59; + fftw_real tmp60; + ASSERT_ALIGNED_DOUBLE; + c_re(inout[0]) = tmp1 + tmp12 + tmp23 + tmp34; + tmp47 = + (K781831482 * tmp38) + (K974927912 * tmp44) + + (K433883739 * tmp41); + tmp46 = + tmp1 + (K623489801 * tmp12) - (K900968867 * tmp34) - + (K222520933 * tmp23); + c_re(inout[6 * iostride]) = tmp46 - tmp47; + c_re(inout[iostride]) = tmp46 + tmp47; + { + fftw_real tmp49; + fftw_real tmp48; + fftw_real tmp45; + fftw_real tmp35; + ASSERT_ALIGNED_DOUBLE; + tmp49 = + (K433883739 * tmp38) + (K974927912 * tmp41) - + (K781831482 * tmp44); + tmp48 = + tmp1 + (K623489801 * tmp23) - + (K222520933 * tmp34) - (K900968867 * tmp12); + c_re(inout[4 * iostride]) = tmp48 - tmp49; + c_re(inout[3 * iostride]) = tmp48 + tmp49; + tmp45 = + (K974927912 * tmp38) - (K781831482 * tmp41) - + (K433883739 * tmp44); + tmp35 = + tmp1 + (K623489801 * tmp34) - + (K900968867 * tmp23) - (K222520933 * tmp12); + c_re(inout[5 * iostride]) = tmp35 - tmp45; + c_re(inout[2 * iostride]) = tmp35 + tmp45; + } + c_im(inout[0]) = tmp50 + tmp51 + tmp52 + tmp53; + tmp59 = + (K974927912 * tmp54) - (K781831482 * tmp56) - + (K433883739 * tmp55); + tmp60 = + (K623489801 * tmp52) + tmp53 - (K900968867 * tmp51) - + (K222520933 * tmp50); + c_im(inout[2 * iostride]) = tmp59 + tmp60; + c_im(inout[5 * iostride]) = tmp60 - tmp59; + { + fftw_real tmp61; + fftw_real tmp62; + fftw_real tmp57; + fftw_real tmp58; + ASSERT_ALIGNED_DOUBLE; + tmp61 = + (K433883739 * tmp54) + (K974927912 * tmp56) - + (K781831482 * tmp55); + tmp62 = + (K623489801 * tmp51) + tmp53 - + (K222520933 * tmp52) - (K900968867 * tmp50); + c_im(inout[3 * iostride]) = tmp61 + tmp62; + c_im(inout[4 * iostride]) = tmp62 - tmp61; + tmp57 = + (K781831482 * tmp54) + (K974927912 * tmp55) + + (K433883739 * tmp56); + tmp58 = + (K623489801 * tmp50) + tmp53 - + (K900968867 * tmp52) - (K222520933 * tmp51); + c_im(inout[iostride]) = tmp57 + tmp58; + c_im(inout[6 * iostride]) = tmp58 - tmp57; + } + } + } +} + +static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6 }; +fftw_codelet_desc fftwi_twiddle_7_desc = { + "fftwi_twiddle_7", + (void (*)()) fftwi_twiddle_7, + 7, + FFTW_BACKWARD, + FFTW_TWIDDLE, + 165, + 6, + twiddle_order, +}; diff --git a/src/fftw/ftwi_8.c b/src/fftw/ftwi_8.c new file mode 100644 index 0000000..ed8720a --- /dev/null +++ b/src/fftw/ftwi_8.c @@ -0,0 +1,285 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:08:32 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 8 */ + +/* + * This function contains 66 FP additions, 32 FP multiplications, + * (or, 52 additions, 18 multiplications, 14 fused multiply/add), + * 28 stack variables, and 32 memory accesses + */ +static const fftw_real K707106781 = +FFTW_KONST(+0.707106781186547524400844362104849039284835938); + +/* + * Generator Id's : + * $Id: ftwi_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_twiddle_8(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 7) { + fftw_real tmp7; + fftw_real tmp43; + fftw_real tmp71; + fftw_real tmp77; + fftw_real tmp41; + fftw_real tmp53; + fftw_real tmp56; + fftw_real tmp64; + fftw_real tmp18; + fftw_real tmp76; + fftw_real tmp46; + fftw_real tmp68; + fftw_real tmp30; + fftw_real tmp48; + fftw_real tmp51; + fftw_real tmp65; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp1; + fftw_real tmp70; + fftw_real tmp6; + fftw_real tmp69; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp70 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[4 * iostride]); + tmp5 = c_im(inout[4 * iostride]); + tmp2 = c_re(W[3]); + tmp4 = c_im(W[3]); + tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5); + tmp69 = (tmp2 * tmp5) - (tmp4 * tmp3); + } + tmp7 = tmp1 + tmp6; + tmp43 = tmp1 - tmp6; + tmp71 = tmp69 + tmp70; + tmp77 = tmp70 - tmp69; + } + { + fftw_real tmp35; + fftw_real tmp54; + fftw_real tmp40; + fftw_real tmp55; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp32; + fftw_real tmp34; + fftw_real tmp31; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(inout[7 * iostride]); + tmp34 = c_im(inout[7 * iostride]); + tmp31 = c_re(W[6]); + tmp33 = c_im(W[6]); + tmp35 = (tmp31 * tmp32) + (tmp33 * tmp34); + tmp54 = (tmp31 * tmp34) - (tmp33 * tmp32); + } + { + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp36; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp37 = c_re(inout[3 * iostride]); + tmp39 = c_im(inout[3 * iostride]); + tmp36 = c_re(W[2]); + tmp38 = c_im(W[2]); + tmp40 = (tmp36 * tmp37) + (tmp38 * tmp39); + tmp55 = (tmp36 * tmp39) - (tmp38 * tmp37); + } + tmp41 = tmp35 + tmp40; + tmp53 = tmp35 - tmp40; + tmp56 = tmp54 - tmp55; + tmp64 = tmp54 + tmp55; + } + { + fftw_real tmp12; + fftw_real tmp44; + fftw_real tmp17; + fftw_real tmp45; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp9; + fftw_real tmp11; + fftw_real tmp8; + fftw_real tmp10; + ASSERT_ALIGNED_DOUBLE; + tmp9 = c_re(inout[2 * iostride]); + tmp11 = c_im(inout[2 * iostride]); + tmp8 = c_re(W[1]); + tmp10 = c_im(W[1]); + tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11); + tmp44 = (tmp8 * tmp11) - (tmp10 * tmp9); + } + { + fftw_real tmp14; + fftw_real tmp16; + fftw_real tmp13; + fftw_real tmp15; + ASSERT_ALIGNED_DOUBLE; + tmp14 = c_re(inout[6 * iostride]); + tmp16 = c_im(inout[6 * iostride]); + tmp13 = c_re(W[5]); + tmp15 = c_im(W[5]); + tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16); + tmp45 = (tmp13 * tmp16) - (tmp15 * tmp14); + } + tmp18 = tmp12 + tmp17; + tmp76 = tmp12 - tmp17; + tmp46 = tmp44 - tmp45; + tmp68 = tmp44 + tmp45; + } + { + fftw_real tmp24; + fftw_real tmp49; + fftw_real tmp29; + fftw_real tmp50; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp21; + fftw_real tmp23; + fftw_real tmp20; + fftw_real tmp22; + ASSERT_ALIGNED_DOUBLE; + tmp21 = c_re(inout[iostride]); + tmp23 = c_im(inout[iostride]); + tmp20 = c_re(W[0]); + tmp22 = c_im(W[0]); + tmp24 = (tmp20 * tmp21) + (tmp22 * tmp23); + tmp49 = (tmp20 * tmp23) - (tmp22 * tmp21); + } + { + fftw_real tmp26; + fftw_real tmp28; + fftw_real tmp25; + fftw_real tmp27; + ASSERT_ALIGNED_DOUBLE; + tmp26 = c_re(inout[5 * iostride]); + tmp28 = c_im(inout[5 * iostride]); + tmp25 = c_re(W[4]); + tmp27 = c_im(W[4]); + tmp29 = (tmp25 * tmp26) + (tmp27 * tmp28); + tmp50 = (tmp25 * tmp28) - (tmp27 * tmp26); + } + tmp30 = tmp24 + tmp29; + tmp48 = tmp24 - tmp29; + tmp51 = tmp49 - tmp50; + tmp65 = tmp49 + tmp50; + } + { + fftw_real tmp19; + fftw_real tmp42; + fftw_real tmp63; + fftw_real tmp66; + ASSERT_ALIGNED_DOUBLE; + tmp19 = tmp7 + tmp18; + tmp42 = tmp30 + tmp41; + c_re(inout[4 * iostride]) = tmp19 - tmp42; + c_re(inout[0]) = tmp19 + tmp42; + { + fftw_real tmp73; + fftw_real tmp74; + fftw_real tmp67; + fftw_real tmp72; + ASSERT_ALIGNED_DOUBLE; + tmp73 = tmp30 - tmp41; + tmp74 = tmp71 - tmp68; + c_im(inout[2 * iostride]) = tmp73 + tmp74; + c_im(inout[6 * iostride]) = tmp74 - tmp73; + tmp67 = tmp65 + tmp64; + tmp72 = tmp68 + tmp71; + c_im(inout[0]) = tmp67 + tmp72; + c_im(inout[4 * iostride]) = tmp72 - tmp67; + } + tmp63 = tmp7 - tmp18; + tmp66 = tmp64 - tmp65; + c_re(inout[6 * iostride]) = tmp63 - tmp66; + c_re(inout[2 * iostride]) = tmp63 + tmp66; + { + fftw_real tmp59; + fftw_real tmp78; + fftw_real tmp62; + fftw_real tmp75; + fftw_real tmp60; + fftw_real tmp61; + ASSERT_ALIGNED_DOUBLE; + tmp59 = tmp43 + tmp46; + tmp78 = tmp76 + tmp77; + tmp60 = tmp56 - tmp53; + tmp61 = tmp48 + tmp51; + tmp62 = K707106781 * (tmp60 - tmp61); + tmp75 = K707106781 * (tmp61 + tmp60); + c_re(inout[7 * iostride]) = tmp59 - tmp62; + c_re(inout[3 * iostride]) = tmp59 + tmp62; + c_im(inout[iostride]) = tmp75 + tmp78; + c_im(inout[5 * iostride]) = tmp78 - tmp75; + } + { + fftw_real tmp47; + fftw_real tmp80; + fftw_real tmp58; + fftw_real tmp79; + fftw_real tmp52; + fftw_real tmp57; + ASSERT_ALIGNED_DOUBLE; + tmp47 = tmp43 - tmp46; + tmp80 = tmp77 - tmp76; + tmp52 = tmp48 - tmp51; + tmp57 = tmp53 + tmp56; + tmp58 = K707106781 * (tmp52 + tmp57); + tmp79 = K707106781 * (tmp52 - tmp57); + c_re(inout[5 * iostride]) = tmp47 - tmp58; + c_re(inout[iostride]) = tmp47 + tmp58; + c_im(inout[3 * iostride]) = tmp79 + tmp80; + c_im(inout[7 * iostride]) = tmp80 - tmp79; + } + } + } +} + +static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6, 7 }; +fftw_codelet_desc fftwi_twiddle_8_desc = { + "fftwi_twiddle_8", + (void (*)()) fftwi_twiddle_8, + 8, + FFTW_BACKWARD, + FFTW_TWIDDLE, + 187, + 7, + twiddle_order, +}; diff --git a/src/fftw/ftwi_9.c b/src/fftw/ftwi_9.c new file mode 100644 index 0000000..a972cd3 --- /dev/null +++ b/src/fftw/ftwi_9.c @@ -0,0 +1,377 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Mon Mar 24 02:08:32 EST 2003 */ + +#include "fftw-int.h" +#include "fftw.h" + +/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 9 */ + +/* + * This function contains 96 FP additions, 72 FP multiplications, + * (or, 60 additions, 36 multiplications, 36 fused multiply/add), + * 34 stack variables, and 36 memory accesses + */ +static const fftw_real K642787609 = +FFTW_KONST(+0.642787609686539326322643409907263432907559884); +static const fftw_real K766044443 = +FFTW_KONST(+0.766044443118978035202392650555416673935832457); +static const fftw_real K939692620 = +FFTW_KONST(+0.939692620785908384054109277324731469936208134); +static const fftw_real K342020143 = +FFTW_KONST(+0.342020143325668733044099614682259580763083368); +static const fftw_real K984807753 = +FFTW_KONST(+0.984807753012208059366743024589523013670643252); +static const fftw_real K173648177 = +FFTW_KONST(+0.173648177666930348851716626769314796000375677); +static const fftw_real K500000000 = +FFTW_KONST(+0.500000000000000000000000000000000000000000000); +static const fftw_real K866025403 = +FFTW_KONST(+0.866025403784438646763723170752936183471402627); + +/* + * Generator Id's : + * $Id: ftwi_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + * $Id: ftwi_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ + */ + +void fftwi_twiddle_9(fftw_complex *A, const fftw_complex *W, int iostride, + int m, int dist) +{ + int i; + fftw_complex *inout; + inout = A; + for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 8) { + fftw_real tmp1; + fftw_real tmp99; + fftw_real tmp64; + fftw_real tmp98; + fftw_real tmp105; + fftw_real tmp104; + fftw_real tmp12; + fftw_real tmp61; + fftw_real tmp47; + fftw_real tmp78; + fftw_real tmp89; + fftw_real tmp54; + fftw_real tmp75; + fftw_real tmp90; + fftw_real tmp30; + fftw_real tmp68; + fftw_real tmp86; + fftw_real tmp59; + fftw_real tmp71; + fftw_real tmp87; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp6; + fftw_real tmp63; + fftw_real tmp11; + fftw_real tmp62; + ASSERT_ALIGNED_DOUBLE; + tmp1 = c_re(inout[0]); + tmp99 = c_im(inout[0]); + { + fftw_real tmp3; + fftw_real tmp5; + fftw_real tmp2; + fftw_real tmp4; + ASSERT_ALIGNED_DOUBLE; + tmp3 = c_re(inout[3 * iostride]); + tmp5 = c_im(inout[3 * iostride]); + tmp2 = c_re(W[2]); + tmp4 = c_im(W[2]); + tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5); + tmp63 = (tmp2 * tmp5) - (tmp4 * tmp3); + } + { + fftw_real tmp8; + fftw_real tmp10; + fftw_real tmp7; + fftw_real tmp9; + ASSERT_ALIGNED_DOUBLE; + tmp8 = c_re(inout[6 * iostride]); + tmp10 = c_im(inout[6 * iostride]); + tmp7 = c_re(W[5]); + tmp9 = c_im(W[5]); + tmp11 = (tmp7 * tmp8) + (tmp9 * tmp10); + tmp62 = (tmp7 * tmp10) - (tmp9 * tmp8); + } + tmp64 = K866025403 * (tmp62 - tmp63); + tmp98 = tmp63 + tmp62; + tmp105 = tmp99 - (K500000000 * tmp98); + tmp104 = K866025403 * (tmp6 - tmp11); + tmp12 = tmp6 + tmp11; + tmp61 = tmp1 - (K500000000 * tmp12); + } + { + fftw_real tmp35; + fftw_real tmp50; + fftw_real tmp40; + fftw_real tmp51; + fftw_real tmp45; + fftw_real tmp52; + fftw_real tmp46; + fftw_real tmp53; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp32; + fftw_real tmp34; + fftw_real tmp31; + fftw_real tmp33; + ASSERT_ALIGNED_DOUBLE; + tmp32 = c_re(inout[2 * iostride]); + tmp34 = c_im(inout[2 * iostride]); + tmp31 = c_re(W[1]); + tmp33 = c_im(W[1]); + tmp35 = (tmp31 * tmp32) + (tmp33 * tmp34); + tmp50 = (tmp31 * tmp34) - (tmp33 * tmp32); + } + { + fftw_real tmp37; + fftw_real tmp39; + fftw_real tmp36; + fftw_real tmp38; + ASSERT_ALIGNED_DOUBLE; + tmp37 = c_re(inout[5 * iostride]); + tmp39 = c_im(inout[5 * iostride]); + tmp36 = c_re(W[4]); + tmp38 = c_im(W[4]); + tmp40 = (tmp36 * tmp37) + (tmp38 * tmp39); + tmp51 = (tmp36 * tmp39) - (tmp38 * tmp37); + } + { + fftw_real tmp42; + fftw_real tmp44; + fftw_real tmp41; + fftw_real tmp43; + ASSERT_ALIGNED_DOUBLE; + tmp42 = c_re(inout[8 * iostride]); + tmp44 = c_im(inout[8 * iostride]); + tmp41 = c_re(W[7]); + tmp43 = c_im(W[7]); + tmp45 = (tmp41 * tmp42) + (tmp43 * tmp44); + tmp52 = (tmp41 * tmp44) - (tmp43 * tmp42); + } + tmp46 = tmp40 + tmp45; + tmp53 = tmp51 + tmp52; + { + fftw_real tmp76; + fftw_real tmp77; + fftw_real tmp73; + fftw_real tmp74; + ASSERT_ALIGNED_DOUBLE; + tmp47 = tmp35 + tmp46; + tmp76 = tmp35 - (K500000000 * tmp46); + tmp77 = K866025403 * (tmp52 - tmp51); + tmp78 = tmp76 - tmp77; + tmp89 = tmp76 + tmp77; + tmp54 = tmp50 + tmp53; + tmp73 = tmp50 - (K500000000 * tmp53); + tmp74 = K866025403 * (tmp40 - tmp45); + tmp75 = tmp73 - tmp74; + tmp90 = tmp74 + tmp73; + } + } + { + fftw_real tmp18; + fftw_real tmp55; + fftw_real tmp23; + fftw_real tmp56; + fftw_real tmp28; + fftw_real tmp57; + fftw_real tmp29; + fftw_real tmp58; + ASSERT_ALIGNED_DOUBLE; + { + fftw_real tmp15; + fftw_real tmp17; + fftw_real tmp14; + fftw_real tmp16; + ASSERT_ALIGNED_DOUBLE; + tmp15 = c_re(inout[iostride]); + tmp17 = c_im(inout[iostride]); + tmp14 = c_re(W[0]); + tmp16 = c_im(W[0]); + tmp18 = (tmp14 * tmp15) + (tmp16 * tmp17); + tmp55 = (tmp14 * tmp17) - (tmp16 * tmp15); + } + { + fftw_real tmp20; + fftw_real tmp22; + fftw_real tmp19; + fftw_real tmp21; + ASSERT_ALIGNED_DOUBLE; + tmp20 = c_re(inout[4 * iostride]); + tmp22 = c_im(inout[4 * iostride]); + tmp19 = c_re(W[3]); + tmp21 = c_im(W[3]); + tmp23 = (tmp19 * tmp20) + (tmp21 * tmp22); + tmp56 = (tmp19 * tmp22) - (tmp21 * tmp20); + } + { + fftw_real tmp25; + fftw_real tmp27; + fftw_real tmp24; + fftw_real tmp26; + ASSERT_ALIGNED_DOUBLE; + tmp25 = c_re(inout[7 * iostride]); + tmp27 = c_im(inout[7 * iostride]); + tmp24 = c_re(W[6]); + tmp26 = c_im(W[6]); + tmp28 = (tmp24 * tmp25) + (tmp26 * tmp27); + tmp57 = (tmp24 * tmp27) - (tmp26 * tmp25); + } + tmp29 = tmp23 + tmp28; + tmp58 = tmp56 + tmp57; + { + fftw_real tmp66; + fftw_real tmp67; + fftw_real tmp69; + fftw_real tmp70; + ASSERT_ALIGNED_DOUBLE; + tmp30 = tmp18 + tmp29; + tmp66 = tmp18 - (K500000000 * tmp29); + tmp67 = K866025403 * (tmp57 - tmp56); + tmp68 = tmp66 - tmp67; + tmp86 = tmp66 + tmp67; + tmp59 = tmp55 + tmp58; + tmp69 = tmp55 - (K500000000 * tmp58); + tmp70 = K866025403 * (tmp23 - tmp28); + tmp71 = tmp69 - tmp70; + tmp87 = tmp70 + tmp69; + } + } + { + fftw_real tmp60; + fftw_real tmp13; + fftw_real tmp48; + fftw_real tmp49; + ASSERT_ALIGNED_DOUBLE; + tmp60 = K866025403 * (tmp54 - tmp59); + tmp13 = tmp1 + tmp12; + tmp48 = tmp30 + tmp47; + tmp49 = tmp13 - (K500000000 * tmp48); + c_re(inout[0]) = tmp13 + tmp48; + c_re(inout[3 * iostride]) = tmp49 + tmp60; + c_re(inout[6 * iostride]) = tmp49 - tmp60; + } + { + fftw_real tmp101; + fftw_real tmp97; + fftw_real tmp100; + fftw_real tmp102; + ASSERT_ALIGNED_DOUBLE; + tmp101 = K866025403 * (tmp30 - tmp47); + tmp97 = tmp59 + tmp54; + tmp100 = tmp98 + tmp99; + tmp102 = tmp100 - (K500000000 * tmp97); + c_im(inout[0]) = tmp97 + tmp100; + c_im(inout[6 * iostride]) = tmp102 - tmp101; + c_im(inout[3 * iostride]) = tmp101 + tmp102; + } + { + fftw_real tmp65; + fftw_real tmp110; + fftw_real tmp80; + fftw_real tmp111; + fftw_real tmp84; + fftw_real tmp109; + fftw_real tmp81; + fftw_real tmp112; + ASSERT_ALIGNED_DOUBLE; + tmp65 = tmp61 - tmp64; + tmp110 = tmp105 - tmp104; + { + fftw_real tmp72; + fftw_real tmp79; + fftw_real tmp82; + fftw_real tmp83; + ASSERT_ALIGNED_DOUBLE; + tmp72 = (K173648177 * tmp68) - (K984807753 * tmp71); + tmp79 = (K342020143 * tmp75) + (K939692620 * tmp78); + tmp80 = tmp72 - tmp79; + tmp111 = K866025403 * (tmp72 + tmp79); + tmp82 = (K342020143 * tmp78) - (K939692620 * tmp75); + tmp83 = (K173648177 * tmp71) + (K984807753 * tmp68); + tmp84 = K866025403 * (tmp82 - tmp83); + tmp109 = tmp83 + tmp82; + } + c_re(inout[2 * iostride]) = tmp65 + tmp80; + tmp81 = tmp65 - (K500000000 * tmp80); + c_re(inout[8 * iostride]) = tmp81 - tmp84; + c_re(inout[5 * iostride]) = tmp81 + tmp84; + c_im(inout[2 * iostride]) = tmp109 + tmp110; + tmp112 = tmp110 - (K500000000 * tmp109); + c_im(inout[5 * iostride]) = tmp111 + tmp112; + c_im(inout[8 * iostride]) = tmp112 - tmp111; + } + { + fftw_real tmp85; + fftw_real tmp106; + fftw_real tmp92; + fftw_real tmp107; + fftw_real tmp96; + fftw_real tmp103; + fftw_real tmp93; + fftw_real tmp108; + ASSERT_ALIGNED_DOUBLE; + tmp85 = tmp61 + tmp64; + tmp106 = tmp104 + tmp105; + { + fftw_real tmp88; + fftw_real tmp91; + fftw_real tmp94; + fftw_real tmp95; + ASSERT_ALIGNED_DOUBLE; + tmp88 = (K766044443 * tmp86) - (K642787609 * tmp87); + tmp91 = (K173648177 * tmp89) - (K984807753 * tmp90); + tmp92 = tmp88 + tmp91; + tmp107 = K866025403 * (tmp88 - tmp91); + tmp94 = (K173648177 * tmp90) + (K984807753 * tmp89); + tmp95 = (K766044443 * tmp87) + (K642787609 * tmp86); + tmp96 = K866025403 * (tmp94 - tmp95); + tmp103 = tmp95 + tmp94; + } + c_re(inout[iostride]) = tmp85 + tmp92; + tmp93 = tmp85 - (K500000000 * tmp92); + c_re(inout[7 * iostride]) = tmp93 - tmp96; + c_re(inout[4 * iostride]) = tmp93 + tmp96; + c_im(inout[iostride]) = tmp103 + tmp106; + tmp108 = tmp106 - (K500000000 * tmp103); + c_im(inout[4 * iostride]) = tmp107 + tmp108; + c_im(inout[7 * iostride]) = tmp108 - tmp107; + } + } +} + +static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6, 7, 8 }; +fftw_codelet_desc fftwi_twiddle_9_desc = { + "fftwi_twiddle_9", + (void (*)()) fftwi_twiddle_9, + 9, + FFTW_BACKWARD, + FFTW_TWIDDLE, + 209, + 8, + twiddle_order, +}; diff --git a/src/fftw/generic.c b/src/fftw/generic.c new file mode 100644 index 0000000..93ad3ec --- /dev/null +++ b/src/fftw/generic.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * + * generic.c -- "generic" codelets. They work for all n (and they are + * slow) + */ +#include "fftw-int.h" +#include <stdlib.h> + +void fftw_twiddle_generic(fftw_complex *A, const fftw_complex *W, + int m, int r, int n, int stride) +{ + int i, j, k; + const fftw_complex *jp; + fftw_complex *kp; + fftw_complex *tmp = (fftw_complex *) + fftw_malloc(r * sizeof(fftw_complex)); + + for (i = 0; i < m; ++i) { + for (k = 0, kp = tmp; k < r; ++k, kp++) { + fftw_real r0, i0, rt, it, rw, iw; + int l1 = i + m * k; + int l0; + + r0 = i0 = 0.0; + for (j = 0, jp = A + i * stride, l0 = 0; j < r; ++j, + jp += m * stride) { + rw = c_re(W[l0]); + iw = c_im(W[l0]); + rt = c_re(*jp); + it = c_im(*jp); + r0 += rt * rw - it * iw; + i0 += rt * iw + it * rw; + l0 += l1; + if (l0 >= n) + l0 -= n; + } + c_re(*kp) = r0; + c_im(*kp) = i0; + } + for (k = 0, kp = A + i * stride; k < r; ++k, kp += m * stride) + *kp = tmp[k]; + } + + fftw_free(tmp); +} + +void fftwi_twiddle_generic(fftw_complex *A, const fftw_complex *W, + int m, int r, int n, int stride) +{ + int i, j, k; + const fftw_complex *jp; + fftw_complex *kp; + fftw_complex *tmp = (fftw_complex *) + fftw_malloc(r * sizeof(fftw_complex)); + + for (i = 0; i < m; ++i) { + for (k = 0, kp = tmp; k < r; ++k, kp++) { + fftw_real r0, i0, rt, it, rw, iw; + int l1 = i + m * k; + int l0; + + r0 = i0 = 0.0; + for (j = 0, jp = A + i * stride, l0 = 0; j < r; ++j, + jp += m * stride) { + rw = c_re(W[l0]); + iw = c_im(W[l0]); + rt = c_re(*jp); + it = c_im(*jp); + r0 += rt * rw + it * iw; + i0 += it * rw - rt * iw; + l0 += l1; + if (l0 >= n) + l0 -= n; + } + c_re(*kp) = r0; + c_im(*kp) = i0; + } + for (k = 0, kp = A + i * stride; k < r; ++k, kp += m * stride) + *kp = tmp[k]; + } + + fftw_free(tmp); +} diff --git a/src/fftw/malloc.c b/src/fftw/malloc.c new file mode 100644 index 0000000..7ae22e4 --- /dev/null +++ b/src/fftw/malloc.c @@ -0,0 +1,240 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * malloc.c -- memory allocation related functions + */ + +/* $Id: malloc.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */ +#include "fftw-int.h" +#include <stdio.h> +#include <stdlib.h> + +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif + +fftw_malloc_type_function fftw_malloc_hook = 0; +fftw_free_type_function fftw_free_hook = 0; +fftw_die_type_function fftw_die_hook = 0; + +/********************************************************** + * DEBUGGING CODE + **********************************************************/ +#ifdef FFTW_DEBUG +static int fftw_malloc_cnt = 0; + +/* + * debugging malloc/free. Initialize every malloced and freed area to + * random values, just to make sure we are not using uninitialized + * pointers. Also check for writes past the ends of allocated blocks, + * and a couple of other things. + * + * This code is a quick and dirty hack -- use at your own risk. + */ + +static int fftw_malloc_total = 0, fftw_malloc_max = 0, fftw_malloc_cnt_max = 0; + +#define MAGIC 0xABadCafe +#define PAD_FACTOR 2 +#define TWOINTS (2 * sizeof(int)) + +#define VERBOSE_ALLOCATION 0 + +#if VERBOSE_ALLOCATION +#define WHEN_VERBOSE(a) a +#else +#define WHEN_VERBOSE(a) +#endif + +void *fftw_malloc(size_t n) +{ + char *p; + int i; + + fftw_malloc_total += n; + + if (fftw_malloc_total > fftw_malloc_max) + fftw_malloc_max = fftw_malloc_total; + + p = (char *) malloc(PAD_FACTOR * n + TWOINTS); + if (!p) + fftw_die("fftw_malloc: out of memory\n"); + + /* store the size in a known position */ + ((int *) p)[0] = n; + ((int *) p)[1] = MAGIC; + for (i = 0; i < PAD_FACTOR * n; ++i) + p[i + TWOINTS] = (char) (i ^ 0xDEADBEEF); + + ++fftw_malloc_cnt; + + if (fftw_malloc_cnt > fftw_malloc_cnt_max) + fftw_malloc_cnt_max = fftw_malloc_cnt; + + /* skip the size we stored previously */ + return (void *) (p + TWOINTS); +} + +void fftw_free(void *p) +{ + char *q; + + if (!p) + return; + + q = ((char *) p) - TWOINTS; + if (!q) + fftw_die("fftw_free: tried to free NULL+TWOINTS pointer!\n"); + + { + int n = ((int *) q)[0]; + int magic = ((int *) q)[1]; + int i; + + WHEN_VERBOSE( { + printf("FFTW_FREE %d\n", n); + fflush(stdout); + }) + + *((int *) q) = 0; /* set to zero to detect duplicate free's */ + + if (magic != MAGIC) + fftw_die("Wrong magic in fftw_free()!\n"); + ((int *) q)[1] = ~MAGIC; + + if (n < 0) + fftw_die("Tried to free block with corrupt size descriptor!\n"); + + fftw_malloc_total -= n; + + if (fftw_malloc_total < 0) + fftw_die("fftw_malloc_total went negative!\n"); + + /* check for writing past end of array: */ + for (i = n; i < PAD_FACTOR * n; ++i) + if (q[i + TWOINTS] != (char) (i ^ 0xDEADBEEF)) { + fflush(stdout); + fprintf(stderr, "Byte %d past end of array has changed!\n", + i - n + 1); + fftw_die("Array bounds overwritten!\n"); + } + for (i = 0; i < PAD_FACTOR * n; ++i) + q[i + TWOINTS] = (char) (i ^ 0xBEEFDEAD); + + --fftw_malloc_cnt; + + if (fftw_malloc_cnt < 0) + fftw_die("fftw_malloc_cnt went negative!\n"); + + if (fftw_malloc_cnt == 0 && fftw_malloc_total > 0 || + fftw_malloc_cnt > 0 && fftw_malloc_total == 0) + fftw_die("fftw_malloc_cnt/total not zero at the same time!\n"); + + free(q); + } +} + +#else +/********************************************************** + * NON DEBUGGING CODE + **********************************************************/ +/* production version, no hacks */ + +void *fftw_malloc(size_t n) +{ + void *p; + + if (fftw_malloc_hook) + return fftw_malloc_hook(n); + + if (n == 0) + n = 1; + + p = malloc(n); + + if (!p) + fftw_die("fftw_malloc: out of memory\n"); + + return p; +} + +void fftw_free(void *p) +{ + if (p) { + if (fftw_free_hook) { + fftw_free_hook(p); + return; + } + free(p); + } +} + +#endif + +/* die when fatal errors occur */ +void fftw_die(const char *s) +{ + if (fftw_die_hook) + fftw_die_hook(s); + + fflush(stdout); + fprintf(stderr, "fftw: %s", s); + exit(EXIT_FAILURE); +} + +/* check for memory leaks when debugging */ +void fftw_check_memory_leaks(void) +{ + extern int fftw_node_cnt, fftw_plan_cnt, fftw_twiddle_size; + +#ifdef FFTW_DEBUG + if (fftw_malloc_cnt || fftw_malloc_total || + fftw_node_cnt || fftw_plan_cnt || fftw_twiddle_size) { + fflush(stdout); + fprintf(stderr, + "MEMORY LEAK!!!\n" + "fftw_malloc = %d" + " node=%d plan=%d twiddle=%d\n" + "fftw_malloc_total = %d\n", + fftw_malloc_cnt, + fftw_node_cnt, fftw_plan_cnt, fftw_twiddle_size, + fftw_malloc_total); + exit(EXIT_FAILURE); + } +#else + if (fftw_node_cnt || fftw_plan_cnt || fftw_twiddle_size) { + fflush(stdout); + fprintf(stderr, + "MEMORY LEAK!!!\n" + " node=%d plan=%d twiddle=%d\n", + fftw_node_cnt, fftw_plan_cnt, fftw_twiddle_size); + exit(EXIT_FAILURE); + } +#endif +} + +void fftw_print_max_memory_usage(void) +{ +#ifdef FFTW_DEBUG + printf("\nMaximum number of blocks allocated = %d\n" + "Maximum number of bytes allocated = %0.3f kB\n", + fftw_malloc_cnt_max, fftw_malloc_max / 1024.0); +#endif +} diff --git a/src/fftw/planner.c b/src/fftw/planner.c new file mode 100644 index 0000000..30217d2 --- /dev/null +++ b/src/fftw/planner.c @@ -0,0 +1,475 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * planner.c -- find the optimal plan + */ + +/* $Id: planner.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */ +#include "fftw-int.h" +#include <stdlib.h> +#include <stdio.h> + +extern fftw_generic_codelet fftw_twiddle_generic; +extern fftw_generic_codelet fftwi_twiddle_generic; +extern fftw_codelet_desc *fftw_config[]; + +fftw_plan_hook_ptr fftw_plan_hook = (fftw_plan_hook_ptr) NULL; + +static void init_test_array(fftw_complex *arr, int stride, int n) +{ + int j; + + for (j = 0; j < n; ++j) { + c_re(arr[stride * j]) = 0.0; + c_im(arr[stride * j]) = 0.0; + } +} + +/* + * The timer keeps doubling the number of iterations + * until the program runs for more than FFTW_TIME_MIN + */ +static double fftw_measure_runtime(fftw_plan plan, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + fftw_time begin, end, start; + double t, tmax, tmin; + int i, iter; + int n; + int repeat; + int howmany = plan->vector_size; + + n = plan->n; + + iter = 1; + + for (;;) { + tmin = 1.0E10; + tmax = -1.0E10; + init_test_array(in, istride, n * howmany); + + start = fftw_get_time(); + /* repeat the measurement FFTW_TIME_REPEAT times */ + for (repeat = 0; repeat < FFTW_TIME_REPEAT; ++repeat) { + begin = fftw_get_time(); + for (i = 0; i < iter; ++i) { + fftw(plan, howmany, in, istride, istride, + out, ostride, ostride); + } + end = fftw_get_time(); + + t = fftw_time_to_sec(fftw_time_diff(end, begin)); + if (t < tmin) + tmin = t; + if (t > tmax) + tmax = t; + + /* do not run for too long */ + t = fftw_time_to_sec(fftw_time_diff(end, start)); + if (t > FFTW_TIME_LIMIT) + break; + } + + if (tmin >= FFTW_TIME_MIN) + break; + + iter *= 2; + } + + tmin /= (double) iter; + tmax /= (double) iter; + + return tmin; +} + +/* auxiliary functions */ +static void compute_cost(fftw_plan plan, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + if (plan->flags & FFTW_MEASURE) + plan->cost = fftw_measure_runtime(plan, in, istride, out, ostride); + else { + double c; + c = plan->n * fftw_estimate_node(plan->root) * plan->vector_size; + plan->cost = c; + } +} + +static void run_plan_hooks(fftw_plan p) +{ + if (fftw_plan_hook && p) { + fftw_complete_twiddle(p->root, p->n); + fftw_plan_hook(p); + } +} + + +/* macrology */ +#define FOR_ALL_CODELETS(p) \ + fftw_codelet_desc **__q, *p; \ + for (__q = &fftw_config[0]; (p = (*__q)); ++__q) + +/****************************************** + * Recursive planner * + ******************************************/ +static fftw_plan planner(fftw_plan *table, int n, fftw_direction dir, + int flags, int vector_size, + fftw_complex *, int, fftw_complex *, int); + +/* + * the planner consists of two parts: one that tries to + * use accumulated wisdom, and one that does not. + * A small driver invokes both parts in sequence + */ + +/* planner with wisdom: look up the codelet suggested by the wisdom */ +static fftw_plan planner_wisdom(fftw_plan *table, int n, + fftw_direction dir, int flags, + int vector_size, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + fftw_plan best = (fftw_plan) 0; + fftw_plan_node *node; + int have_wisdom; + enum fftw_node_type wisdom_type; + int wisdom_signature; + fftw_recurse_kind wisdom_recurse_kind; + + /* see if we remember any wisdom for this case */ + have_wisdom = fftw_wisdom_lookup(n, flags, dir, FFTW_WISDOM, + istride, ostride, + &wisdom_type, &wisdom_signature, + &wisdom_recurse_kind, 0); + + if (!have_wisdom) + return best; + + if (wisdom_type == FFTW_NOTW) { + FOR_ALL_CODELETS(p) { + if (p->dir == dir && p->type == wisdom_type) { + /* see if wisdom applies */ + if (wisdom_signature == p->signature && + p->size == n) { + node = fftw_make_node_notw(n, p); + best = fftw_make_plan(n, dir, node, flags, + p->type, p->signature, + FFTW_NORMAL_RECURSE, + vector_size); + fftw_use_plan(best); + run_plan_hooks(best); + return best; + } + } + } + } + if (wisdom_type == FFTW_TWIDDLE) { + FOR_ALL_CODELETS(p) { + if (p->dir == dir && p->type == wisdom_type) { + + /* see if wisdom applies */ + if (wisdom_signature == p->signature && + p->size > 1 && + (n % p->size) == 0) { + fftw_plan r = planner(table, n / p->size, dir, + flags | FFTW_NO_VECTOR_RECURSE, + wisdom_recurse_kind == + FFTW_VECTOR_RECURSE ? + p->size : vector_size, + in, istride, out, ostride); + node = fftw_make_node_twiddle(n, p, + r->root, flags); + best = fftw_make_plan(n, dir, node, flags, + p->type, p->signature, + wisdom_recurse_kind, + vector_size); + fftw_use_plan(best); + run_plan_hooks(best); + fftw_destroy_plan_internal(r); + return best; + } + } + } + } + /* + * BUG (or: TODO) Can we have generic wisdom? This is probably + * an academic question + */ + + return best; +} + +/* + * planner with no wisdom: try all combinations and pick + * the best + */ +static fftw_plan planner_normal(fftw_plan *table, int n, fftw_direction dir, + int flags, int vector_size, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + fftw_plan best = (fftw_plan) 0; + fftw_plan newplan; + fftw_plan_node *node; + + /* see if we have any codelet that solves the problem */ + { + FOR_ALL_CODELETS(p) { + if (p->dir == dir && p->type == FFTW_NOTW) { + if (p->size == n) { + node = fftw_make_node_notw(n, p); + newplan = fftw_make_plan(n, dir, node, flags, + p->type, p->signature, + FFTW_NORMAL_RECURSE, + vector_size); + fftw_use_plan(newplan); + compute_cost(newplan, in, istride, out, ostride); + run_plan_hooks(newplan); + best = fftw_pick_better(newplan, best); + } + } + } + } + + /* Then, try all available twiddle codelets */ + { + FOR_ALL_CODELETS(p) { + if (p->dir == dir && p->type == FFTW_TWIDDLE) { + if ((n % p->size) == 0 && + p->size > 1 && + (!best || n != p->size)) { + fftw_plan r = planner(table, n / p->size, dir, + flags | FFTW_NO_VECTOR_RECURSE, + vector_size, + in, istride, out, ostride); + node = fftw_make_node_twiddle(n, p, + r->root, flags); + newplan = fftw_make_plan(n, dir, node, flags, + p->type, p->signature, + FFTW_NORMAL_RECURSE, + vector_size); + fftw_use_plan(newplan); + fftw_destroy_plan_internal(r); + compute_cost(newplan, in, istride, out, ostride); + run_plan_hooks(newplan); + best = fftw_pick_better(newplan, best); + } + } + } + } + + /* try vector recursion unless prohibited by the flags: */ + if (! (flags & FFTW_NO_VECTOR_RECURSE)) { + FOR_ALL_CODELETS(p) { + if (p->dir == dir && p->type == FFTW_TWIDDLE) { + if ((n % p->size) == 0 && + p->size > 1 && + (!best || n != p->size)) { + fftw_plan r = planner(table, n / p->size, dir, + flags | FFTW_NO_VECTOR_RECURSE, + p->size, + in, istride, out, ostride); + node = fftw_make_node_twiddle(n, p, + r->root, flags); + newplan = fftw_make_plan(n, dir, node, flags, + p->type, p->signature, + FFTW_VECTOR_RECURSE, + vector_size); + fftw_use_plan(newplan); + fftw_destroy_plan_internal(r); + compute_cost(newplan, in, istride, out, ostride); + run_plan_hooks(newplan); + best = fftw_pick_better(newplan, best); + } + } + } + } + + /* + * resort to generic or rader codelets for unknown factors + */ + { + fftw_generic_codelet *codelet = (dir == FFTW_FORWARD ? + fftw_twiddle_generic : + fftwi_twiddle_generic); + int size, prev_size = 0, remaining_factors = n; + fftw_plan r; + + while (remaining_factors > 1) { + size = fftw_factor(remaining_factors); + remaining_factors /= size; + + /* don't try the same factor more than once */ + if (size == prev_size) + continue; + prev_size = size; + + /* Look for codelets corresponding to this factor. */ + { + FOR_ALL_CODELETS(p) { + if (p->dir == dir && p->type == FFTW_TWIDDLE + && p->size == size) { + size = 0; + break; + } + } + } + + /* + * only try a generic/rader codelet if there were no + * twiddle codelets for this factor + */ + if (!size) + continue; + + r = planner(table, n / size, dir, + flags | FFTW_NO_VECTOR_RECURSE, + vector_size, + in, istride, out, ostride); + + /* Try Rader codelet: */ + node = fftw_make_node_rader(n, size, dir, r->root, flags); + newplan = fftw_make_plan(n, dir, node, flags, FFTW_RADER, 0, + FFTW_NORMAL_RECURSE, vector_size); + fftw_use_plan(newplan); + compute_cost(newplan, in, istride, out, ostride); + run_plan_hooks(newplan); + best = fftw_pick_better(newplan, best); + + if (size < 100) { /* + * only try generic for small + * sizes + */ + /* Try generic codelet: */ + node = fftw_make_node_generic(n, size, codelet, + r->root, flags); + newplan = fftw_make_plan(n, dir, node, flags, + FFTW_GENERIC, 0, + FFTW_NORMAL_RECURSE, vector_size); + fftw_use_plan(newplan); + compute_cost(newplan, in, istride, out, ostride); + run_plan_hooks(newplan); + best = fftw_pick_better(newplan, best); + } + fftw_destroy_plan_internal(r); + } + } + + if (!best) + fftw_die("bug in planner\n"); + + return best; +} + +static fftw_plan planner(fftw_plan *table, int n, fftw_direction dir, + int flags, int vector_size, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + fftw_plan best = (fftw_plan) 0; + + if (vector_size > 1) + flags |= FFTW_NO_VECTOR_RECURSE; + + /* see if plan has already been computed */ + best = fftw_lookup(table, n, flags, vector_size); + if (best) { + fftw_use_plan(best); + return best; + } + /* try a wise plan */ + best = planner_wisdom(table, n, dir, flags, vector_size, + in, istride, out, ostride); + + if (!best) { + /* No wisdom. Plan normally. */ + best = planner_normal(table, n, dir, flags, + vector_size, + in, istride, out, ostride); + } + if (best) { + fftw_insert(table, best); + + /* remember the wisdom */ + fftw_wisdom_add(n, flags, dir, FFTW_WISDOM, istride, ostride, + best->wisdom_type, + best->wisdom_signature, + best->recurse_kind); + } + return best; +} + +fftw_plan fftw_create_plan_specific(int n, fftw_direction dir, int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + fftw_plan table; + fftw_plan p1; + + /* validate parameters */ + if (n <= 0) + return (fftw_plan) 0; + +#ifndef FFTW_ENABLE_VECTOR_RECURSE + /* TEMPORARY: disable vector recursion until it is more tested. */ + flags |= FFTW_NO_VECTOR_RECURSE; +#endif + + if ((dir != FFTW_FORWARD) && (dir != FFTW_BACKWARD)) + return (fftw_plan) 0; + + fftw_make_empty_table(&table); + p1 = planner(&table, n, dir, flags, 1, + in, istride, out, ostride); + fftw_destroy_table(&table); + + if (p1) + fftw_complete_twiddle(p1->root, n); + return p1; +} + +fftw_plan fftw_create_plan(int n, fftw_direction dir, int flags) +{ + fftw_complex *tmp_in, *tmp_out; + fftw_plan p; + + if (flags & FFTW_MEASURE) { + tmp_in = (fftw_complex *) fftw_malloc(2 * n * sizeof(fftw_complex)); + if (!tmp_in) + return 0; + tmp_out = tmp_in + n; + + p = fftw_create_plan_specific(n, dir, flags, + tmp_in, 1, tmp_out, 1); + + fftw_free(tmp_in); + } else + p = fftw_create_plan_specific(n, dir, flags, + (fftw_complex *) 0, 1, (fftw_complex *) 0, 1); + + return p; +} + +void fftw_destroy_plan(fftw_plan plan) +{ + fftw_destroy_plan_internal(plan); +} diff --git a/src/fftw/putils.c b/src/fftw/putils.c new file mode 100644 index 0000000..7cbe87d --- /dev/null +++ b/src/fftw/putils.c @@ -0,0 +1,555 @@ + +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * putils.c -- plan utilities shared by planner.c and rplanner.c + */ + +/* $Id: putils.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */ +#include "fftw-int.h" +#include <stdlib.h> +#include <stdio.h> + +int fftw_node_cnt = 0; +int fftw_plan_cnt = 0; + +/* + * These two constants are used for the FFTW_ESTIMATE flag to help + * create a heuristic plan. They don't affect FFTW_MEASURE. + */ +#define NOTW_OPTIMAL_SIZE 32 +#define TWIDDLE_OPTIMAL_SIZE 12 + +#define IS_POWER_OF_TWO(n) (((n) & ((n) - 1)) == 0) + +/* constructors --- I wish I had ML */ +fftw_plan_node *fftw_make_node(void) +{ + fftw_plan_node *p = (fftw_plan_node *) + fftw_malloc(sizeof(fftw_plan_node)); + p->refcnt = 0; + fftw_node_cnt++; + return p; +} + +void fftw_use_node(fftw_plan_node *p) +{ + ++p->refcnt; +} + +fftw_plan_node *fftw_make_node_notw(int size, const fftw_codelet_desc *config) +{ + fftw_plan_node *p = fftw_make_node(); + + p->type = config->type; + p->nodeu.notw.size = size; + p->nodeu.notw.codelet = (fftw_notw_codelet *) config->codelet; + p->nodeu.notw.codelet_desc = config; + return p; +} + +fftw_plan_node *fftw_make_node_real2hc(int size, + const fftw_codelet_desc *config) +{ + fftw_plan_node *p = fftw_make_node(); + + p->type = config->type; + p->nodeu.real2hc.size = size; + p->nodeu.real2hc.codelet = (fftw_real2hc_codelet *) config->codelet; + p->nodeu.real2hc.codelet_desc = config; + return p; +} + +fftw_plan_node *fftw_make_node_hc2real(int size, + const fftw_codelet_desc *config) +{ + fftw_plan_node *p = fftw_make_node(); + + p->type = config->type; + p->nodeu.hc2real.size = size; + p->nodeu.hc2real.codelet = (fftw_hc2real_codelet *) config->codelet; + p->nodeu.hc2real.codelet_desc = config; + return p; +} + +fftw_plan_node *fftw_make_node_twiddle(int n, + const fftw_codelet_desc *config, + fftw_plan_node *recurse, + int flags) +{ + fftw_plan_node *p = fftw_make_node(); + + p->type = config->type; + p->nodeu.twiddle.size = config->size; + p->nodeu.twiddle.codelet = (fftw_twiddle_codelet *) config->codelet; + p->nodeu.twiddle.recurse = recurse; + p->nodeu.twiddle.codelet_desc = config; + fftw_use_node(recurse); + if (flags & FFTW_MEASURE) + p->nodeu.twiddle.tw = fftw_create_twiddle(n, config); + else + p->nodeu.twiddle.tw = 0; + return p; +} + +fftw_plan_node *fftw_make_node_hc2hc(int n, fftw_direction dir, + const fftw_codelet_desc *config, + fftw_plan_node *recurse, + int flags) +{ + fftw_plan_node *p = fftw_make_node(); + + p->type = config->type; + p->nodeu.hc2hc.size = config->size; + p->nodeu.hc2hc.dir = dir; + p->nodeu.hc2hc.codelet = (fftw_hc2hc_codelet *) config->codelet; + p->nodeu.hc2hc.recurse = recurse; + p->nodeu.hc2hc.codelet_desc = config; + fftw_use_node(recurse); + if (flags & FFTW_MEASURE) + p->nodeu.hc2hc.tw = fftw_create_twiddle(n, config); + else + p->nodeu.hc2hc.tw = 0; + return p; +} + +fftw_plan_node *fftw_make_node_generic(int n, int size, + fftw_generic_codelet *codelet, + fftw_plan_node *recurse, + int flags) +{ + fftw_plan_node *p = fftw_make_node(); + + p->type = FFTW_GENERIC; + p->nodeu.generic.size = size; + p->nodeu.generic.codelet = codelet; + p->nodeu.generic.recurse = recurse; + fftw_use_node(recurse); + + if (flags & FFTW_MEASURE) + p->nodeu.generic.tw = fftw_create_twiddle(n, + (const fftw_codelet_desc *) 0); + else + p->nodeu.generic.tw = 0; + return p; +} + +fftw_plan_node *fftw_make_node_rgeneric(int n, int size, + fftw_direction dir, + fftw_rgeneric_codelet *codelet, + fftw_plan_node *recurse, + int flags) +{ + fftw_plan_node *p = fftw_make_node(); + + if (size % 2 == 0 || (n / size) % 2 == 0) + fftw_die("invalid size for rgeneric codelet\n"); + + p->type = FFTW_RGENERIC; + p->nodeu.rgeneric.size = size; + p->nodeu.rgeneric.dir = dir; + p->nodeu.rgeneric.codelet = codelet; + p->nodeu.rgeneric.recurse = recurse; + fftw_use_node(recurse); + + if (flags & FFTW_MEASURE) + p->nodeu.rgeneric.tw = fftw_create_twiddle(n, + (const fftw_codelet_desc *) 0); + else + p->nodeu.rgeneric.tw = 0; + return p; +} + +/* + * Note that these two Rader-related things must go here, rather than + * in rader.c, in order that putils.c (and rplanner.c) won't depend + * upon rader.c. + */ + +fftw_rader_data *fftw_rader_top = NULL; + +static void fftw_destroy_rader(fftw_rader_data * d) +{ + if (d) { + d->refcount--; + if (d->refcount <= 0) { + fftw_rader_data *cur = fftw_rader_top, *prev = NULL; + + while (cur && cur != d) { + prev = cur; + cur = cur->next; + } + if (!cur) + fftw_die("invalid Rader data pointer\n"); + + if (prev) + prev->next = d->next; + else + fftw_rader_top = d->next; + + fftw_destroy_plan_internal(d->plan); + fftw_free(d->omega); + fftw_free(d->cdesc); + fftw_free(d); + } + } +} + +static void destroy_tree(fftw_plan_node *p) +{ + if (p) { + --p->refcnt; + if (p->refcnt == 0) { + switch (p->type) { + case FFTW_NOTW: + case FFTW_REAL2HC: + case FFTW_HC2REAL: + break; + + case FFTW_TWIDDLE: + if (p->nodeu.twiddle.tw) + fftw_destroy_twiddle(p->nodeu.twiddle.tw); + destroy_tree(p->nodeu.twiddle.recurse); + break; + + case FFTW_HC2HC: + if (p->nodeu.hc2hc.tw) + fftw_destroy_twiddle(p->nodeu.hc2hc.tw); + destroy_tree(p->nodeu.hc2hc.recurse); + break; + + case FFTW_GENERIC: + if (p->nodeu.generic.tw) + fftw_destroy_twiddle(p->nodeu.generic.tw); + destroy_tree(p->nodeu.generic.recurse); + break; + + case FFTW_RADER: + if (p->nodeu.rader.tw) + fftw_destroy_twiddle(p->nodeu.rader.tw); + if (p->nodeu.rader.rader_data) + fftw_destroy_rader(p->nodeu.rader.rader_data); + destroy_tree(p->nodeu.rader.recurse); + break; + + case FFTW_RGENERIC: + if (p->nodeu.rgeneric.tw) + fftw_destroy_twiddle(p->nodeu.rgeneric.tw); + destroy_tree(p->nodeu.rgeneric.recurse); + break; + } + + fftw_free(p); + fftw_node_cnt--; + } + } +} + +/* create a plan with twiddle factors, and other bells and whistles */ +fftw_plan fftw_make_plan(int n, fftw_direction dir, + fftw_plan_node *root, int flags, + enum fftw_node_type wisdom_type, + int wisdom_signature, + fftw_recurse_kind recurse_kind, int vector_size) +{ + fftw_plan p = (fftw_plan) fftw_malloc(sizeof(struct fftw_plan_struct)); + + p->n = n; + p->dir = dir; + p->flags = flags; + fftw_use_node(root); + p->root = root; + p->cost = 0.0; + p->wisdom_type = wisdom_type; + p->wisdom_signature = wisdom_signature; + p->recurse_kind = recurse_kind; + p->vector_size = vector_size; + if (recurse_kind == FFTW_VECTOR_RECURSE && vector_size > 1) + fftw_die("invalid vector-recurse plan attempted\n"); + p->next = (fftw_plan) 0; + p->refcnt = 0; + fftw_plan_cnt++; + return p; +} + +/* + * complete with twiddle factors (because nodes don't have + * them when FFTW_ESTIMATE is set) + */ +void fftw_complete_twiddle(fftw_plan_node *p, int n) +{ + int r; + switch (p->type) { + case FFTW_NOTW: + case FFTW_REAL2HC: + case FFTW_HC2REAL: + break; + + case FFTW_TWIDDLE: + r = p->nodeu.twiddle.size; + if (!p->nodeu.twiddle.tw) + p->nodeu.twiddle.tw = + fftw_create_twiddle(n, p->nodeu.twiddle.codelet_desc); + fftw_complete_twiddle(p->nodeu.twiddle.recurse, n / r); + break; + + case FFTW_HC2HC: + r = p->nodeu.hc2hc.size; + if (!p->nodeu.hc2hc.tw) + p->nodeu.hc2hc.tw = + fftw_create_twiddle(n, p->nodeu.hc2hc.codelet_desc); + fftw_complete_twiddle(p->nodeu.hc2hc.recurse, n / r); + break; + + case FFTW_GENERIC: + r = p->nodeu.generic.size; + if (!p->nodeu.generic.tw) + p->nodeu.generic.tw = + fftw_create_twiddle(n, (const fftw_codelet_desc *) 0); + fftw_complete_twiddle(p->nodeu.generic.recurse, n / r); + break; + + case FFTW_RADER: + r = p->nodeu.rader.size; + if (!p->nodeu.rader.tw) + p->nodeu.rader.tw = + fftw_create_twiddle(n, p->nodeu.rader.rader_data->cdesc); + fftw_complete_twiddle(p->nodeu.rader.recurse, n / r); + break; + + case FFTW_RGENERIC: + r = p->nodeu.rgeneric.size; + if (!p->nodeu.rgeneric.tw) + p->nodeu.rgeneric.tw = + fftw_create_twiddle(n, (const fftw_codelet_desc *) 0); + fftw_complete_twiddle(p->nodeu.rgeneric.recurse, n / r); + break; + + } +} + +void fftw_use_plan(fftw_plan p) +{ + ++p->refcnt; +} + +void fftw_destroy_plan_internal(fftw_plan p) +{ + --p->refcnt; + + if (p->refcnt == 0) { + destroy_tree(p->root); + fftw_plan_cnt--; + fftw_free(p); + } +} + +/* end of constructors */ + +/* management of plan tables */ +void fftw_make_empty_table(fftw_plan *table) +{ + *table = (fftw_plan) 0; +} + +void fftw_insert(fftw_plan *table, fftw_plan this_plan) +{ + fftw_use_plan(this_plan); + this_plan->next = *table; + *table = this_plan; +} + +fftw_plan fftw_lookup(fftw_plan *table, int n, int flags, int vector_size) +{ + fftw_plan p; + + for (p = *table; p && + (p->n != n || p->flags != flags || p->vector_size != vector_size); + p = p->next); + + return p; +} + +void fftw_destroy_table(fftw_plan *table) +{ + fftw_plan p, q; + + for (p = *table; p; p = q) { + q = p->next; + fftw_destroy_plan_internal(p); + } +} + +double fftw_estimate_node(fftw_plan_node *p) +{ + int k; + + switch (p->type) { + case FFTW_NOTW: + k = p->nodeu.notw.size; + goto common1; + + case FFTW_REAL2HC: + k = p->nodeu.real2hc.size; + goto common1; + + case FFTW_HC2REAL: + k = p->nodeu.hc2real.size; + common1: + return 1.0 + 0.1 * (k - NOTW_OPTIMAL_SIZE) * + (k - NOTW_OPTIMAL_SIZE); + + case FFTW_TWIDDLE: + k = p->nodeu.twiddle.size; + return 1.0 + 0.1 * (k - TWIDDLE_OPTIMAL_SIZE) * + (k - TWIDDLE_OPTIMAL_SIZE) + + fftw_estimate_node(p->nodeu.twiddle.recurse); + + case FFTW_HC2HC: + k = p->nodeu.hc2hc.size; + return 1.0 + 0.1 * (k - TWIDDLE_OPTIMAL_SIZE) * + (k - TWIDDLE_OPTIMAL_SIZE) + + fftw_estimate_node(p->nodeu.hc2hc.recurse); + + case FFTW_GENERIC: + k = p->nodeu.generic.size; + return 10.0 + k * k + + fftw_estimate_node(p->nodeu.generic.recurse); + + case FFTW_RADER: + k = p->nodeu.rader.size; + return 10.0 + 10 * k + + fftw_estimate_node(p->nodeu.rader.recurse); + + case FFTW_RGENERIC: + k = p->nodeu.rgeneric.size; + return 10.0 + k * k + + fftw_estimate_node(p->nodeu.rgeneric.recurse); + } + return 1.0E20; +} + +/* pick the better of two plans and destroy the other one. */ +fftw_plan fftw_pick_better(fftw_plan p1, fftw_plan p2) +{ + if (!p1) + return p2; + + if (!p2) + return p1; + + if (p1->cost > p2->cost) { + fftw_destroy_plan_internal(p1); + return p2; + } else { + fftw_destroy_plan_internal(p2); + return p1; + } +} + +/* find the smallest prime factor of n */ +int fftw_factor(int n) +{ + int r; + + /* try 2 */ + if ((n & 1) == 0) + return 2; + + /* try odd numbers up to sqrt(n) */ + for (r = 3; r * r <= n; r += 2) + if (n % r == 0) + return r; + + /* n is prime */ + return n; +} + +static void print_node(FILE *f, fftw_plan_node *p, int indent) +{ + if (p) { + switch (p->type) { + case FFTW_NOTW: + fprintf(f, "%*sFFTW_NOTW %d\n", indent, "", + p->nodeu.notw.size); + break; + case FFTW_REAL2HC: + fprintf(f, "%*sFFTW_REAL2HC %d\n", indent, "", + p->nodeu.real2hc.size); + break; + case FFTW_HC2REAL: + fprintf(f, "%*sFFTW_HC2REAL %d\n", indent, "", + p->nodeu.hc2real.size); + break; + case FFTW_TWIDDLE: + fprintf(f, "%*sFFTW_TWIDDLE %d\n", indent, "", + p->nodeu.twiddle.size); + print_node(f, p->nodeu.twiddle.recurse, indent); + break; + case FFTW_HC2HC: + fprintf(f, "%*sFFTW_HC2HC %d\n", indent, "", + p->nodeu.hc2hc.size); + print_node(f, p->nodeu.hc2hc.recurse, indent); + break; + case FFTW_GENERIC: + fprintf(f, "%*sFFTW_GENERIC %d\n", indent, "", + p->nodeu.generic.size); + print_node(f, p->nodeu.generic.recurse, indent); + break; + case FFTW_RADER: + fprintf(f, "%*sFFTW_RADER %d\n", indent, "", + p->nodeu.rader.size); + + fprintf(f, "%*splan for size %d convolution:\n", + indent + 4, "", p->nodeu.rader.size - 1); + print_node(f, p->nodeu.rader.rader_data->plan->root, + indent + 6); + + print_node(f, p->nodeu.rader.recurse, indent); + break; + case FFTW_RGENERIC: + fprintf(f, "%*sFFTW_RGENERIC %d\n", indent, "", + p->nodeu.rgeneric.size); + print_node(f, p->nodeu.rgeneric.recurse, indent); + break; + } + } +} + +void fftw_fprint_plan(FILE *f, fftw_plan p) +{ + + fprintf(f, "plan: (cost = %e)\n", p->cost); + if (p->recurse_kind == FFTW_VECTOR_RECURSE) + fprintf(f, "(vector recursion)\n"); + else if (p->vector_size > 1) + fprintf(f, "(vector-size %d)\n", p->vector_size); + print_node(f, p->root, 0); +} + +void fftw_print_plan(fftw_plan p) +{ + fftw_fprint_plan(stdout, p); +} + +size_t fftw_sizeof_fftw_real(void) +{ + return(sizeof(fftw_real)); +} diff --git a/src/fftw/rader.c b/src/fftw/rader.c new file mode 100644 index 0000000..156529b --- /dev/null +++ b/src/fftw/rader.c @@ -0,0 +1,365 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * Compute transforms of prime sizes using Rader's trick: turn them + * into convolutions of size n - 1, which you then perform via a pair + * of FFTs. + */ + +#include <stdlib.h> +#include <math.h> + +#include "fftw-int.h" + +#ifdef FFTW_DEBUG +#define WHEN_DEBUG(a) a +#else +#define WHEN_DEBUG(a) +#endif + +/* compute n^m mod p, where m >= 0 and p > 0. */ +static int power_mod(int n, int m, int p) +{ + if (m == 0) + return 1; + else if (m % 2 == 0) { + int x = power_mod(n, m / 2, p); + return MULMOD(x, x, p); + } + else + return MULMOD(n, power_mod(n, m - 1, p), p); +} + +/* + * Find the period of n in the multiplicative group mod p (p prime). + * That is, return the smallest m such that n^m == 1 mod p. + */ +static int period(int n, int p) +{ + int prod = n, period = 1; + + while (prod != 1) { + prod = MULMOD(prod, n, p); + ++period; + if (prod == 0) + fftw_die("non-prime order in Rader\n"); + } + return period; +} + +/* find a generator for the multiplicative group mod p, where p is prime */ +static int find_generator(int p) +{ + int g; + + for (g = 1; g < p; ++g) + if (period(g, p) == p - 1) + break; + if (g == p) + fftw_die("couldn't find generator for Rader\n"); + return g; +} + +/***************************************************************************/ + +static fftw_rader_data *create_rader_aux(int p, int flags) +{ + fftw_complex *omega, *work; + int g, ginv, gpower; + int i; + FFTW_TRIG_REAL twoPiOverN; + fftw_real scale = 1.0 / (p - 1); /* for convolution */ + fftw_plan plan; + fftw_rader_data *d; + + if (p < 2) + fftw_die("non-prime order in Rader\n"); + + flags &= ~FFTW_IN_PLACE; + + d = (fftw_rader_data *) fftw_malloc(sizeof(fftw_rader_data)); + + g = find_generator(p); + ginv = power_mod(g, p - 2, p); + + omega = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex)); + + plan = fftw_create_plan(p - 1, FFTW_FORWARD, + flags & ~FFTW_NO_VECTOR_RECURSE); + + work = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex)); + + twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) p; + gpower = 1; + for (i = 0; i < p - 1; ++i) { + c_re(work[i]) = scale * FFTW_TRIG_COS(twoPiOverN * gpower); + c_im(work[i]) = FFTW_FORWARD * scale * FFTW_TRIG_SIN(twoPiOverN + * gpower); + gpower = MULMOD(gpower, ginv, p); + } + + /* fft permuted roots of unity */ + fftw_executor_simple(p - 1, work, omega, plan->root, 1, 1, + plan->recurse_kind); + + fftw_free(work); + + d->plan = plan; + d->omega = omega; + d->g = g; + d->ginv = ginv; + d->p = p; + d->flags = flags; + d->refcount = 1; + d->next = NULL; + + d->cdesc = (fftw_codelet_desc *) fftw_malloc(sizeof(fftw_codelet_desc)); + d->cdesc->name = NULL; + d->cdesc->codelet = NULL; + d->cdesc->size = p; + d->cdesc->dir = FFTW_FORWARD; + d->cdesc->type = FFTW_RADER; + d->cdesc->signature = g; + d->cdesc->ntwiddle = 0; + d->cdesc->twiddle_order = NULL; + return d; +} + +/***************************************************************************/ + +static fftw_rader_data *fftw_create_rader(int p, int flags) +{ + fftw_rader_data *d = fftw_rader_top; + + flags &= ~FFTW_IN_PLACE; + while (d && (d->p != p || d->flags != flags)) + d = d->next; + if (d) { + d->refcount++; + return d; + } + d = create_rader_aux(p, flags); + d->next = fftw_rader_top; + fftw_rader_top = d; + return d; +} + +/***************************************************************************/ + +/* Compute the prime FFTs, premultiplied by twiddle factors. Below, we + * extensively use the identity that fft(x*)* = ifft(x) in order to + * share data between forward and backward transforms and to obviate + * the necessity of having separate forward and backward plans. */ + +void fftw_twiddle_rader(fftw_complex *A, const fftw_complex *W, + int m, int r, int stride, + fftw_rader_data * d) +{ + fftw_complex *tmp = (fftw_complex *) + fftw_malloc((r - 1) * sizeof(fftw_complex)); + int i, k, gpower = 1, g = d->g, ginv = d->ginv; + fftw_real a0r, a0i; + fftw_complex *omega = d->omega; + + for (i = 0; i < m; ++i, A += stride, W += r - 1) { + /* + * Here, we fft W[k-1] * A[k*(m*stride)], using Rader. + * (Actually, W is pre-permuted to match the permutation that we + * will do on A.) + */ + + /* First, permute the input and multiply by W, storing in tmp: */ + /* gpower == g^k mod r in the following loop */ + for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) { + fftw_real rA, iA, rW, iW; + rW = c_re(W[k]); + iW = c_im(W[k]); + rA = c_re(A[gpower * (m * stride)]); + iA = c_im(A[gpower * (m * stride)]); + c_re(tmp[k]) = rW * rA - iW * iA; + c_im(tmp[k]) = rW * iA + iW * rA; + } + + WHEN_DEBUG( { + if (gpower != 1) + fftw_die("incorrect generator in Rader\n"); + } + ); + + /* FFT tmp to A: */ + fftw_executor_simple(r - 1, tmp, A + (m * stride), + d->plan->root, 1, m * stride, + d->plan->recurse_kind); + + /* set output DC component: */ + a0r = c_re(A[0]); + a0i = c_im(A[0]); + c_re(A[0]) += c_re(A[(m * stride)]); + c_im(A[0]) += c_im(A[(m * stride)]); + + /* now, multiply by omega: */ + for (k = 0; k < r - 1; ++k) { + fftw_real rA, iA, rW, iW; + rW = c_re(omega[k]); + iW = c_im(omega[k]); + rA = c_re(A[(k + 1) * (m * stride)]); + iA = c_im(A[(k + 1) * (m * stride)]); + c_re(A[(k + 1) * (m * stride)]) = rW * rA - iW * iA; + c_im(A[(k + 1) * (m * stride)]) = -(rW * iA + iW * rA); + } + + /* this will add A[0] to all of the outputs after the ifft */ + c_re(A[(m * stride)]) += a0r; + c_im(A[(m * stride)]) -= a0i; + + /* inverse FFT: */ + fftw_executor_simple(r - 1, A + (m * stride), tmp, + d->plan->root, m * stride, 1, + d->plan->recurse_kind); + + /* finally, do inverse permutation to unshuffle the output: */ + for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) { + c_re(A[gpower * (m * stride)]) = c_re(tmp[k]); + c_im(A[gpower * (m * stride)]) = -c_im(tmp[k]); + } + + WHEN_DEBUG( { + if (gpower != 1) + fftw_die("incorrect generator in Rader\n"); + } + ); + + } + + fftw_free(tmp); +} + +void fftwi_twiddle_rader(fftw_complex *A, const fftw_complex *W, + int m, int r, int stride, + fftw_rader_data * d) +{ + fftw_complex *tmp = (fftw_complex *) + fftw_malloc((r - 1) * sizeof(fftw_complex)); + int i, k, gpower = 1, g = d->g, ginv = d->ginv; + fftw_real a0r, a0i; + fftw_complex *omega = d->omega; + + for (i = 0; i < m; ++i, A += stride, W += r - 1) { + /* + * Here, we fft W[k-1]* * A[k*(m*stride)], using Rader. + * (Actually, W is pre-permuted to match the permutation that + * we will do on A.) + */ + + /* First, permute the input and multiply by W*, storing in tmp: */ + /* gpower == g^k mod r in the following loop */ + for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) { + fftw_real rA, iA, rW, iW; + rW = c_re(W[k]); + iW = c_im(W[k]); + rA = c_re(A[gpower * (m * stride)]); + iA = c_im(A[gpower * (m * stride)]); + c_re(tmp[k]) = rW * rA + iW * iA; + c_im(tmp[k]) = iW * rA - rW * iA; + } + + WHEN_DEBUG( { + if (gpower != 1) + fftw_die("incorrect generator in Rader\n"); + } + ); + + /* FFT tmp to A: */ + fftw_executor_simple(r - 1, tmp, A + (m * stride), + d->plan->root, 1, m * stride, + d->plan->recurse_kind); + + /* set output DC component: */ + a0r = c_re(A[0]); + a0i = c_im(A[0]); + c_re(A[0]) += c_re(A[(m * stride)]); + c_im(A[0]) -= c_im(A[(m * stride)]); + + /* now, multiply by omega: */ + for (k = 0; k < r - 1; ++k) { + fftw_real rA, iA, rW, iW; + rW = c_re(omega[k]); + iW = c_im(omega[k]); + rA = c_re(A[(k + 1) * (m * stride)]); + iA = c_im(A[(k + 1) * (m * stride)]); + c_re(A[(k + 1) * (m * stride)]) = rW * rA - iW * iA; + c_im(A[(k + 1) * (m * stride)]) = -(rW * iA + iW * rA); + } + + /* this will add A[0] to all of the outputs after the ifft */ + c_re(A[(m * stride)]) += a0r; + c_im(A[(m * stride)]) += a0i; + + /* inverse FFT: */ + fftw_executor_simple(r - 1, A + (m * stride), tmp, + d->plan->root, m * stride, 1, + d->plan->recurse_kind); + + /* finally, do inverse permutation to unshuffle the output: */ + for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) { + A[gpower * (m * stride)] = tmp[k]; + } + + WHEN_DEBUG( { + if (gpower != 1) + fftw_die("incorrect generator in Rader\n"); + } + ); + } + + fftw_free(tmp); +} + +/***************************************************************************/ + +/* + * Make an FFTW_RADER plan node. Note that this function must go + * here, rather than in putils.c, because it indirectly calls the + * fftw_planner. If we included it in putils.c, which is also used + * by rfftw, then any program using rfftw would be linked with all + * of the FFTW codelets, even if they were not needed. I wish that the + * darn linkers operated on a function rather than a file granularity. + */ +fftw_plan_node *fftw_make_node_rader(int n, int size, fftw_direction dir, + fftw_plan_node *recurse, + int flags) +{ + fftw_plan_node *p = fftw_make_node(); + + p->type = FFTW_RADER; + p->nodeu.rader.size = size; + p->nodeu.rader.codelet = dir == FFTW_FORWARD ? + fftw_twiddle_rader : fftwi_twiddle_rader; + p->nodeu.rader.rader_data = fftw_create_rader(size, flags); + p->nodeu.rader.recurse = recurse; + fftw_use_node(recurse); + + if (flags & FFTW_MEASURE) + p->nodeu.rader.tw = + fftw_create_twiddle(n, p->nodeu.rader.rader_data->cdesc); + else + p->nodeu.rader.tw = 0; + return p; +} diff --git a/src/fftw/timer.c b/src/fftw/timer.c new file mode 100644 index 0000000..a7d05a4 --- /dev/null +++ b/src/fftw/timer.c @@ -0,0 +1,164 @@ + +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * timer.c -- this file measures the execution time of + * ffts. This information is used by the planner. + */ + +/* $Id: timer.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */ + +#include <time.h> +#include "fftw-int.h" +#include <math.h> +#include <stdlib.h> + +/********************* System-specific Timing Support *********************/ + +#if defined(HAVE_MAC_TIMER) && !defined(HAVE_MAC_PCI_TIMER) + +/* Use Macintosh Time Manager to get the time: */ + +/* + * make sure compiler (CW) recognizes the pascal keywords that are in + * Timer.h + */ +#pragma only_std_keywords off + +#include <Timer.h> + +#pragma only_std_keywords reset + +fftw_time get_Mac_microseconds(void) +{ + fftw_time t; + UnsignedWide microsec; /* + * microsec.lo and microsec.hi are + * unsigned long's, and are the two parts + * of a 64 bit unsigned integer + */ + + Microseconds(µsec); /* get time in microseconds */ + + /* store lo and hi words into our structure: */ + t.lo = microsec.lo; + t.hi = microsec.hi; + + return t; +} + +fftw_time fftw_time_diff(fftw_time t1, fftw_time t2) +/* + * This function takes the difference t1 - t2 of two 64 bit + * integers, represented by the 32 bit lo and hi words. + * if t1 < t2, returns 0. + */ +{ + fftw_time diff; + + if (t1.hi < t2.hi) { /* something is wrong...t1 < t2! */ + diff.hi = diff.lo = 0; + return diff; + } else + diff.hi = t1.hi - t2.hi; + + if (t1.lo < t2.lo) { + if (diff.hi > 0) + diff.hi -= 1; /* carry */ + else { /* something is wrong...t1 < t2! */ + diff.hi = diff.lo = 0; + return diff; + } + } + diff.lo = t1.lo - t2.lo; + + return diff; +} + +#endif + +#ifdef HAVE_WIN32_TIMER +#include <windows.h> + +static LARGE_INTEGER gFreq; +static int gHaveHiResTimer = 0; +static int gFirstTime = 1; + +unsigned long GetPerfTime(void) +{ + LARGE_INTEGER lCounter; + + if (gFirstTime) { + gFirstTime = 0; + + if (QueryPerformanceFrequency(&gFreq)) { + gHaveHiResTimer = 1; + } + } + if (gHaveHiResTimer) { + QueryPerformanceCounter(&lCounter); + return lCounter.u.LowPart; + } else { + return (unsigned long) clock(); + } +} + +double GetPerfSec(double pTime) +{ + if (gHaveHiResTimer) { + return pTime / gFreq.u.LowPart; // assumes HighPart==0 + + } else { + return pTime / CLOCKS_PER_SEC; + } +} + +#endif /* HAVE_WIN32_TIMER */ + +#if defined(FFTW_USE_GETTIMEOFDAY) + +/* timer support routines for systems having gettimeofday */ + +#if defined(HAVE_BSDGETTIMEOFDAY) && ! defined(HAVE_GETTIMEOFDAY) +#define gettimeofday BSDgettimeofday +#endif + +fftw_time fftw_gettimeofday_get_time(void) +{ + struct timeval tv; + gettimeofday(&tv, 0); + return tv; +} + +fftw_time fftw_gettimeofday_time_diff(fftw_time t1, fftw_time t2) +{ + fftw_time diff; + + diff.tv_sec = t1.tv_sec - t2.tv_sec; + diff.tv_usec = t1.tv_usec - t2.tv_usec; + /* normalize */ + while (diff.tv_usec < 0) { + diff.tv_usec += 1000000L; + diff.tv_sec -= 1; + } + + return diff; +} +#endif diff --git a/src/fftw/twiddle.c b/src/fftw/twiddle.c new file mode 100644 index 0000000..16e9fd0 --- /dev/null +++ b/src/fftw/twiddle.c @@ -0,0 +1,218 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * twiddle.c -- compute twiddle factors + * These are the twiddle factors for *direct* fft. Flip sign to get + * the inverse + */ + +/* $Id: twiddle.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */ +#include "fftw-int.h" +#include <math.h> +#include <stdlib.h> +#include <limits.h> + +#ifndef TRUE +#define TRUE (1 == 1) +#endif + +#ifndef FALSE +#define FALSE (1 == 0) +#endif + +#ifdef USE_FFTW_SAFE_MULMOD +/* compute (x * y) mod p, but watch out for integer overflows; we must + have x, y >= 0, p > 0. This routine is slow. */ +int fftw_safe_mulmod(int x, int y, int p) +{ + if (y == 0 || x <= INT_MAX / y) + return((x * y) % p); + else { + int y2 = y/2; + return((fftw_safe_mulmod(x, y2, p) + + fftw_safe_mulmod(x, y - y2, p)) % p); + } +} +#endif /* USE_FFTW_SAFE_MULMOD */ + +static fftw_complex *fftw_compute_rader_twiddle(int n, int r, int g) +{ + FFTW_TRIG_REAL twoPiOverN; + int m = n / r; + int i, j, gpower; + fftw_complex *W; + + twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) n; + W = (fftw_complex *) fftw_malloc((r - 1) * m * sizeof(fftw_complex)); + for (i = 0; i < m; ++i) + for (gpower = 1, j = 0; j < r - 1; ++j, + gpower = MULMOD(gpower, g, r)) { + int k = i * (r - 1) + j; + FFTW_TRIG_REAL + ij = (FFTW_TRIG_REAL) (i * gpower); + c_re(W[k]) = FFTW_TRIG_COS(twoPiOverN * ij); + c_im(W[k]) = FFTW_FORWARD * FFTW_TRIG_SIN(twoPiOverN * ij); + } + + return W; +} + +/* + * compute the W coefficients (that is, powers of the root of 1) + * and store them into an array. + */ +static fftw_complex *fftw_compute_twiddle(int n, const fftw_codelet_desc *d) +{ + FFTW_TRIG_REAL twoPiOverN; + int i, j; + fftw_complex *W; + + twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) n; + + if (!d) { + /* generic codelet, needs all twiddles in order */ + W = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex)); + for (i = 0; i < n; ++i) { + c_re(W[i]) = FFTW_TRIG_COS(twoPiOverN * (FFTW_TRIG_REAL) i); + c_im(W[i]) = FFTW_FORWARD * FFTW_TRIG_SIN(twoPiOverN * (FFTW_TRIG_REAL) i); + } + } else if (d->type == FFTW_RADER) + W = fftw_compute_rader_twiddle(n, d->size, d->signature); + else { + int r = d->size; + int m = n / r, m_alloc; + int r1 = d->ntwiddle; + int istart; + + if (d->type == FFTW_TWIDDLE) { + istart = 0; + m_alloc = m; + } else if (d->type == FFTW_HC2HC) { + /* + * This is tricky, do not change lightly. + */ + m = (m + 1) / 2; + m_alloc = m - 1; + istart = 1; + } else { + fftw_die("compute_twiddle: invalid argument\n"); + /* paranoia for gcc */ + m_alloc = 0; + istart = 0; + } + + W = (fftw_complex *) fftw_malloc(r1 * m_alloc * sizeof(fftw_complex)); + for (i = istart; i < m; ++i) + for (j = 0; j < r1; ++j) { + int k = (i - istart) * r1 + j; + FFTW_TRIG_REAL + ij = (FFTW_TRIG_REAL) (i * d->twiddle_order[j]); + c_re(W[k]) = FFTW_TRIG_COS(twoPiOverN * ij); + c_im(W[k]) = FFTW_FORWARD * FFTW_TRIG_SIN(twoPiOverN * ij); + } + } + + return W; +} + +/* + * these routines implement a simple reference-count-based + * management of twiddle structures + */ +static fftw_twiddle *twlist = (fftw_twiddle *) 0; +int fftw_twiddle_size = 0; /* total allocated size, for debugging */ + +/* true if the two codelets can share the same twiddle factors */ +static int compatible(const fftw_codelet_desc *d1, const fftw_codelet_desc *d2) +{ + int i; + + /* true if they are the same codelet */ + if (d1 == d2) + return TRUE; + + /* false if one is null and the other is not */ + if (!d1 || !d2) + return FALSE; + + /* false if size is different */ + if (d1->size != d2->size) + return FALSE; + + /* false if different types (FFTW_TWIDDLE/FFTW_HC2HC/FFTW_RADER) */ + if (d1->type != d2->type) + return FALSE; + + /* false if they need different # of twiddles */ + if (d1->ntwiddle != d2->ntwiddle) + return FALSE; + + /* false if the twiddle orders are different */ + for (i = 0; i < d1->ntwiddle; ++i) + if (d1->twiddle_order[i] != d2->twiddle_order[i]) + return FALSE; + + return TRUE; +} + +fftw_twiddle *fftw_create_twiddle(int n, const fftw_codelet_desc *d) +{ + fftw_twiddle *tw; + + /* lookup this n in the twiddle list */ + for (tw = twlist; tw; tw = tw->next) + if (n == tw->n && compatible(d, tw->cdesc)) { + ++tw->refcnt; + return tw; + } + /* not found --- allocate a new struct twiddle */ + tw = (fftw_twiddle *) fftw_malloc(sizeof(fftw_twiddle)); + fftw_twiddle_size += n; + + tw->n = n; + tw->cdesc = d; + tw->twarray = fftw_compute_twiddle(n, d); + tw->refcnt = 1; + + /* enqueue the new struct */ + tw->next = twlist; + twlist = tw; + + return tw; +} + +void fftw_destroy_twiddle(fftw_twiddle * tw) +{ + fftw_twiddle **p; + --tw->refcnt; + + if (tw->refcnt == 0) { + /* remove from the list of known twiddle factors */ + for (p = &twlist; p; p = &((*p)->next)) + if (*p == tw) { + *p = tw->next; + fftw_twiddle_size -= tw->n; + fftw_free(tw->twarray); + fftw_free(tw); + return; + } + fftw_die("BUG in fftw_destroy_twiddle\n"); + } +} diff --git a/src/fftw/wisdom.c b/src/fftw/wisdom.c new file mode 100644 index 0000000..b487ea8 --- /dev/null +++ b/src/fftw/wisdom.c @@ -0,0 +1,317 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * wisdom.c -- manage the wisdom + */ + +#include "fftw-int.h" +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> + +struct wisdom { + int n; + int flags; + fftw_direction dir; + enum fftw_wisdom_category category; + int istride; + int ostride; + int vector_size; + enum fftw_node_type type; /* this is the wisdom */ + int signature; /* this is the wisdom */ + fftw_recurse_kind recurse_kind; /* this is the wisdom */ + struct wisdom *next; +}; + +/* list of wisdom */ +static struct wisdom *wisdom_list = (struct wisdom *) 0; + +int fftw_wisdom_lookup(int n, int flags, fftw_direction dir, + enum fftw_wisdom_category category, + int istride, int ostride, + enum fftw_node_type *type, + int *signature, fftw_recurse_kind *recurse_kind, + int replacep) +{ + struct wisdom *p; + + if (!(flags & FFTW_USE_WISDOM)) + return 0; /* simply ignore if wisdom is disabled */ + + flags |= FFTW_MEASURE; /* + * always use (only) wisdom from + * measurements + */ + + for (p = wisdom_list; p; p = p->next) { + if (p->n == n && p->flags == flags && p->dir == dir && + p->istride == istride && p->ostride == ostride && + p->category == category) { + /* found wisdom */ + if (replacep) { + /* replace old wisdom with new */ + p->type = *type; + p->signature = *signature; + p->recurse_kind = *recurse_kind; + } else { + *type = p->type; + *signature = p->signature; + *recurse_kind = p->recurse_kind; + } + return 1; + } + } + + return 0; +} + +void fftw_wisdom_add(int n, int flags, fftw_direction dir, + enum fftw_wisdom_category category, + int istride, int ostride, + enum fftw_node_type type, + int signature, + fftw_recurse_kind recurse_kind) +{ + struct wisdom *p; + + if ((flags & FFTW_NO_VECTOR_RECURSE) && + recurse_kind == FFTW_VECTOR_RECURSE) + fftw_die("bug in planner (conflicting plan options)\n"); + + if (!(flags & FFTW_USE_WISDOM)) + return; /* simply ignore if wisdom is disabled */ + + if (!(flags & FFTW_MEASURE)) + return; /* only measurements produce wisdom */ + + if (fftw_wisdom_lookup(n, flags, dir, category, istride, ostride, + &type, &signature, &recurse_kind, 1)) + return; /* wisdom overwrote old wisdom */ + + p = (struct wisdom *) fftw_malloc(sizeof(struct wisdom)); + + p->n = n; + p->flags = flags; + p->dir = dir; + p->category = category; + p->istride = istride; + p->ostride = ostride; + p->type = type; + p->signature = signature; + p->recurse_kind = recurse_kind; + + /* remember this wisdom */ + p->next = wisdom_list; + wisdom_list = p; +} + +void fftw_forget_wisdom(void) +{ + while (wisdom_list) { + struct wisdom *p; + + p = wisdom_list; + wisdom_list = wisdom_list->next; + fftw_free(p); + } +} + +/* + * user-visible routines, to convert wisdom into strings etc. + */ +static const char *WISDOM_FORMAT_VERSION = "FFTW-" FFTW_VERSION; + +static void (*emit) (char c, void *data); + +static void emit_string(const char *s, void *data) +{ + while (*s) + emit(*s++, data); +} + +static void emit_int(int n, void *data) +{ + char buf[128]; + + sprintf(buf, "%d", n); + emit_string(buf, data); +} + +/* dump wisdom in lisp-like format */ +void fftw_export_wisdom(void (*emitter) (char c, void *), void *data) +{ + struct wisdom *p; + + /* install the output handler */ + emit = emitter; + + emit('(', data); + emit_string(WISDOM_FORMAT_VERSION, data); + + for (p = wisdom_list; p; p = p->next) { + emit(' ', data); /* separator to make the output nicer */ + emit('(', data); + emit_int((int) p->n, data); + emit(' ', data); + emit_int((int) p->flags, data); + emit(' ', data); + emit_int((int) p->dir, data); + emit(' ', data); + emit_int((int) p->category, data); + emit(' ', data); + emit_int((int) p->istride, data); + emit(' ', data); + emit_int((int) p->ostride, data); + emit(' ', data); + emit_int((int) p->type, data); + emit(' ', data); + emit_int((int) p->signature, data); + emit(' ', data); + emit_int((int) p->recurse_kind, data); + emit(')', data); + } + emit(')', data); +} + +/* input part */ +static int next_char; +static int (*get_input) (void *data); +static fftw_status input_error; + +static void read_char(void *data) +{ + next_char = get_input(data); + if (next_char == 0 || + next_char == EOF) + input_error = FFTW_FAILURE; +} + +/* skip blanks, newlines, tabs, etc */ +static void eat_blanks(void *data) +{ + while (isspace(next_char)) + read_char(data); +} + +static int read_int(void *data) +{ + int sign = 1; + int n = 0; + + eat_blanks(data); + if (next_char == '-') { + sign = -1; + read_char(data); + eat_blanks(data); + } + if (!isdigit(next_char)) { + /* error, no digit */ + input_error = FFTW_FAILURE; + return 0; + } + while (isdigit(next_char)) { + n = n * 10 + (next_char - '0'); + read_char(data); + } + + return sign * n; +} + +#define EXPECT(c) \ +{ \ + eat_blanks(data); \ + if (input_error == FFTW_FAILURE || \ + next_char != c) \ + return FFTW_FAILURE; \ + read_char(data); \ +} + +#define EXPECT_INT(n) \ +{ \ + n = read_int(data); \ + if (input_error == FFTW_FAILURE) \ + return FFTW_FAILURE; \ +} + +#define EXPECT_STRING(s) \ +{ \ + const char *s1 = s; \ + while (*s1) { \ + EXPECT(*s1); \ + ++s1; \ + } \ +} + +fftw_status fftw_import_wisdom(int (*g) (void *), void *data) +{ + int n; + int flags; + fftw_direction dir; + int dir_int; + enum fftw_wisdom_category category; + int category_int; + enum fftw_node_type type; + int recurse_kind_int; + fftw_recurse_kind recurse_kind; + int type_int; + int signature; + int istride, ostride; + + get_input = g; + input_error = FFTW_SUCCESS; + + read_char(data); + + eat_blanks(data); + EXPECT('('); + eat_blanks(data); + EXPECT_STRING(WISDOM_FORMAT_VERSION); + eat_blanks(data); + + while (next_char != ')') { + EXPECT('('); + EXPECT_INT(n); + EXPECT_INT(flags); + /* paranoid respect for enumerated types */ + EXPECT_INT(dir_int); + dir = (fftw_direction) dir_int; + EXPECT_INT(category_int); + category = (enum fftw_wisdom_category) category_int; + EXPECT_INT(istride); + EXPECT_INT(ostride); + EXPECT_INT(type_int); + type = (enum fftw_node_type) type_int; + EXPECT_INT(signature); + EXPECT_INT(recurse_kind_int); + recurse_kind = (fftw_recurse_kind) recurse_kind_int; + eat_blanks(data); + EXPECT(')'); + + /* the wisdom has been read properly. Add it */ + fftw_wisdom_add(n, flags, dir, category, + istride, ostride, + type, signature, recurse_kind); + + /* prepare for next morsel of wisdom */ + eat_blanks(data); + } + + return FFTW_SUCCESS; +} diff --git a/src/fftw/wisdomio.c b/src/fftw/wisdomio.c new file mode 100644 index 0000000..a085151 --- /dev/null +++ b/src/fftw/wisdomio.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "fftw-int.h" + +/**************** import/export using file ***************/ + +static void file_emitter(char c, void *data) +{ + putc(c, (FILE *) data); +} + +void fftw_export_wisdom_to_file(FILE *output_file) +{ + if (output_file) + fftw_export_wisdom(file_emitter, (void *) output_file); +} + +static int file_get_input(void *data) +{ + return getc((FILE *) data); +} + +fftw_status fftw_import_wisdom_from_file(FILE *input_file) +{ + if (!input_file) + return FFTW_FAILURE; + return fftw_import_wisdom(file_get_input, (void *) input_file); +} + +/*************** import/export using string **************/ + +static void emission_counter(char c, void *data) +{ + int *counter = (int *) data; + + ++*counter; +} + +static void string_emitter(char c, void *data) +{ + char **output_string = (char **) data; + + *((*output_string)++) = c; + **output_string = 0; +} + +char *fftw_export_wisdom_to_string(void) +{ + int string_length = 0; + char *s, *s2; + + fftw_export_wisdom(emission_counter, (void *) &string_length); + + s = (char *) fftw_malloc(sizeof(char) * (string_length + 1)); + if (!s) + return 0; + s2 = s; + + fftw_export_wisdom(string_emitter, (void *) &s2); + + if (s + string_length != s2) + fftw_die("Unexpected output string length!\n"); + + return s; +} + +static int string_get_input(void *data) +{ + char **input_string = (char **) data; + + if (**input_string) + return *((*input_string)++); + else + return 0; +} + +fftw_status fftw_import_wisdom_from_string(const char *input_string) +{ + const char *s = input_string; + + if (!input_string) + return FFTW_FAILURE; + return fftw_import_wisdom(string_get_input, (void *) &s); +} |