summaryrefslogtreecommitdiff
path: root/src/fftw/fftwnd.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/fftw/fftwnd.c')
-rw-r--r--src/fftw/fftwnd.c806
1 files changed, 806 insertions, 0 deletions
diff --git a/src/fftw/fftwnd.c b/src/fftw/fftwnd.c
new file mode 100644
index 0000000..57354b0
--- /dev/null
+++ b/src/fftw/fftwnd.c
@@ -0,0 +1,806 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+/* $Id: fftwnd.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+#include "fftw-int.h"
+
+/* the number of buffers to use for buffered transforms: */
+#define FFTWND_NBUFFERS 8
+
+/* the default number of buffers to use: */
+#define FFTWND_DEFAULT_NBUFFERS 0
+
+/* the number of "padding" elements between consecutive buffer lines */
+#define FFTWND_BUFFER_PADDING 8
+
+static void destroy_plan_array(int rank, fftw_plan *plans);
+
+static void init_test_array(fftw_complex *arr, int stride, int n)
+{
+ int j;
+
+ for (j = 0; j < n; ++j) {
+ c_re(arr[stride * j]) = 0.0;
+ c_im(arr[stride * j]) = 0.0;
+ }
+}
+
+/*
+ * Same as fftw_measure_runtime, except for fftwnd plan.
+ */
+double fftwnd_measure_runtime(fftwnd_plan plan,
+ fftw_complex *in, int istride,
+ fftw_complex *out, int ostride)
+{
+ fftw_time begin, end, start;
+ double t, tmax, tmin;
+ int i, iter;
+ int n;
+ int repeat;
+
+ if (plan->rank == 0)
+ return 0.0;
+
+ n = 1;
+ for (i = 0; i < plan->rank; ++i)
+ n *= plan->n[i];
+
+ iter = 1;
+
+ for (;;) {
+ tmin = 1.0E10;
+ tmax = -1.0E10;
+ init_test_array(in, istride, n);
+
+ start = fftw_get_time();
+ /* repeat the measurement FFTW_TIME_REPEAT times */
+ for (repeat = 0; repeat < FFTW_TIME_REPEAT; ++repeat) {
+ begin = fftw_get_time();
+ for (i = 0; i < iter; ++i) {
+ fftwnd(plan, 1, in, istride, 0, out, ostride, 0);
+ }
+ end = fftw_get_time();
+
+ t = fftw_time_to_sec(fftw_time_diff(end, begin));
+ if (t < tmin)
+ tmin = t;
+ if (t > tmax)
+ tmax = t;
+
+ /* do not run for too long */
+ t = fftw_time_to_sec(fftw_time_diff(end, start));
+ if (t > FFTW_TIME_LIMIT)
+ break;
+ }
+
+ if (tmin >= FFTW_TIME_MIN)
+ break;
+
+ iter *= 2;
+ }
+
+ tmin /= (double) iter;
+ tmax /= (double) iter;
+
+ return tmin;
+}
+
+/********************** Initializing the FFTWND Plan ***********************/
+
+/* Initialize everything except for the 1D plans and the work array: */
+fftwnd_plan fftwnd_create_plan_aux(int rank, const int *n,
+ fftw_direction dir, int flags)
+{
+ int i;
+ fftwnd_plan p;
+
+ if (rank < 0)
+ return 0;
+
+ for (i = 0; i < rank; ++i)
+ if (n[i] <= 0)
+ return 0;
+
+ p = (fftwnd_plan) fftw_malloc(sizeof(fftwnd_data));
+ p->n = 0;
+ p->n_before = 0;
+ p->n_after = 0;
+ p->plans = 0;
+ p->work = 0;
+ p->dir = dir;
+
+ p->rank = rank;
+ p->is_in_place = flags & FFTW_IN_PLACE;
+
+ p->nwork = 0;
+ p->nbuffers = 0;
+
+ if (rank == 0)
+ return 0;
+
+ p->n = (int *) fftw_malloc(sizeof(int) * rank);
+ p->n_before = (int *) fftw_malloc(sizeof(int) * rank);
+ p->n_after = (int *) fftw_malloc(sizeof(int) * rank);
+ p->n_before[0] = 1;
+ p->n_after[rank - 1] = 1;
+
+ for (i = 0; i < rank; ++i) {
+ p->n[i] = n[i];
+
+ if (i) {
+ p->n_before[i] = p->n_before[i - 1] * n[i - 1];
+ p->n_after[rank - 1 - i] = p->n_after[rank - i] * n[rank - i];
+ }
+ }
+
+ return p;
+}
+
+/* create an empty new array of rank 1d plans */
+fftw_plan *fftwnd_new_plan_array(int rank)
+{
+ fftw_plan *plans;
+ int i;
+
+ plans = (fftw_plan *) fftw_malloc(rank * sizeof(fftw_plan));
+ if (!plans)
+ return 0;
+ for (i = 0; i < rank; ++i)
+ plans[i] = 0;
+ return plans;
+}
+
+/*
+ * create an array of plans using the ordinary 1d fftw_create_plan,
+ * which allocates its own array and creates plans optimized for
+ * contiguous data.
+ */
+fftw_plan *fftwnd_create_plans_generic(fftw_plan *plans,
+ int rank, const int *n,
+ fftw_direction dir, int flags)
+{
+ if (rank <= 0)
+ return 0;
+
+ if (plans) {
+ int i, j;
+ int cur_flags;
+
+ for (i = 0; i < rank; ++i) {
+ if (i < rank - 1 || (flags & FFTW_IN_PLACE)) {
+ /*
+ * fft's except the last dimension are always in-place
+ */
+ cur_flags = flags | FFTW_IN_PLACE;
+ for (j = i - 1; j >= 0 && n[i] != n[j]; --j);
+ } else {
+ cur_flags = flags;
+ /*
+ * we must create a separate plan for the last
+ * dimension
+ */
+ j = -1;
+ }
+
+ if (j >= 0) {
+ /*
+ * If a plan already exists for this size
+ * array, reuse it:
+ */
+ plans[i] = plans[j];
+ } else {
+ /* generate a new plan: */
+ plans[i] = fftw_create_plan(n[i], dir, cur_flags);
+ if (!plans[i]) {
+ destroy_plan_array(rank, plans);
+ return 0;
+ }
+ }
+ }
+ }
+ return plans;
+}
+
+static int get_maxdim(int rank, const int *n, int flags)
+{
+ int i;
+ int maxdim = 0;
+
+ for (i = 0; i < rank - 1; ++i)
+ if (n[i] > maxdim)
+ maxdim = n[i];
+ if (rank > 0 && flags & FFTW_IN_PLACE && n[rank - 1] > maxdim)
+ maxdim = n[rank - 1];
+
+ return maxdim;
+}
+
+/* compute number of elements required for work array (has to
+ be big enough to hold ncopies of the largest dimension in
+ n that will need an in-place transform. */
+int fftwnd_work_size(int rank, const int *n, int flags, int ncopies)
+{
+ return (ncopies * get_maxdim(rank, n, flags)
+ + (ncopies - 1) * FFTWND_BUFFER_PADDING);
+}
+
+/*
+ * create plans using the fftw_create_plan_specific planner, which
+ * allows us to create plans for each dimension that are specialized
+ * for the strides that we are going to use.
+ */
+fftw_plan *fftwnd_create_plans_specific(fftw_plan *plans,
+ int rank, const int *n,
+ const int *n_after,
+ fftw_direction dir, int flags,
+ fftw_complex *in, int istride,
+ fftw_complex *out, int ostride)
+{
+ if (rank <= 0)
+ return 0;
+
+ if (plans) {
+ int i, stride, cur_flags;
+ fftw_complex *work = 0;
+ int nwork;
+
+ nwork = fftwnd_work_size(rank, n, flags, 1);
+ if (nwork)
+ work = (fftw_complex*)fftw_malloc(nwork * sizeof(fftw_complex));
+
+ for (i = 0; i < rank; ++i) {
+ /* fft's except the last dimension are always in-place */
+ if (i < rank - 1)
+ cur_flags = flags | FFTW_IN_PLACE;
+ else
+ cur_flags = flags;
+
+ /* stride for transforming ith dimension */
+ stride = n_after[i];
+
+ if (cur_flags & FFTW_IN_PLACE)
+ plans[i] = fftw_create_plan_specific(n[i], dir, cur_flags,
+ in, istride * stride,
+ work, 1);
+ else
+ plans[i] = fftw_create_plan_specific(n[i], dir, cur_flags,
+ in, istride * stride,
+ out, ostride * stride);
+ if (!plans[i]) {
+ destroy_plan_array(rank, plans);
+ fftw_free(work);
+ return 0;
+ }
+ }
+
+ if (work)
+ fftw_free(work);
+ }
+ return plans;
+}
+
+/*
+ * Create an fftwnd_plan specialized for specific arrays. (These
+ * arrays are ignored, however, if they are NULL or if the flags do
+ * not include FFTW_MEASURE.) The main advantage of being provided
+ * arrays like this is that we can do runtime timing measurements of
+ * our options, without worrying about allocating excessive scratch
+ * space.
+ */
+fftwnd_plan fftwnd_create_plan_specific(int rank, const int *n,
+ fftw_direction dir, int flags,
+ fftw_complex *in, int istride,
+ fftw_complex *out, int ostride)
+{
+ fftwnd_plan p;
+
+ if (!(p = fftwnd_create_plan_aux(rank, n, dir, flags)))
+ return 0;
+
+ if (!(flags & FFTW_MEASURE) || in == 0
+ || (!p->is_in_place && out == 0)) {
+
+/**** use default plan ****/
+
+ p->plans = fftwnd_create_plans_generic(fftwnd_new_plan_array(rank),
+ rank, n, dir, flags);
+ if (!p->plans) {
+ fftwnd_destroy_plan(p);
+ return 0;
+ }
+ if (flags & FFTWND_FORCE_BUFFERED)
+ p->nbuffers = FFTWND_NBUFFERS;
+ else
+ p->nbuffers = FFTWND_DEFAULT_NBUFFERS;
+
+ p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1);
+ if (p->nwork && !(flags & FFTW_THREADSAFE)) {
+ p->work = (fftw_complex*) fftw_malloc(p->nwork
+ * sizeof(fftw_complex));
+ if (!p->work) {
+ fftwnd_destroy_plan(p);
+ return 0;
+ }
+ }
+ } else {
+/**** use runtime measurements to pick plan ****/
+
+ fftw_plan *plans_buf, *plans_nobuf;
+ double t_buf, t_nobuf;
+
+ p->nwork = fftwnd_work_size(rank, n, flags, FFTWND_NBUFFERS + 1);
+ if (p->nwork && !(flags & FFTW_THREADSAFE)) {
+ p->work = (fftw_complex*) fftw_malloc(p->nwork
+ * sizeof(fftw_complex));
+ if (!p->work) {
+ fftwnd_destroy_plan(p);
+ return 0;
+ }
+ }
+ else
+ p->work = (fftw_complex*) NULL;
+
+ /* two possible sets of 1D plans: */
+ plans_buf = fftwnd_create_plans_generic(fftwnd_new_plan_array(rank),
+ rank, n, dir, flags);
+ plans_nobuf =
+ fftwnd_create_plans_specific(fftwnd_new_plan_array(rank),
+ rank, n, p->n_after, dir,
+ flags, in, istride,
+ out, ostride);
+ if (!plans_buf || !plans_nobuf) {
+ destroy_plan_array(rank, plans_nobuf);
+ destroy_plan_array(rank, plans_buf);
+ fftwnd_destroy_plan(p);
+ return 0;
+ }
+ /* time the two possible plans */
+ p->plans = plans_nobuf;
+ p->nbuffers = 0;
+ p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1);
+ t_nobuf = fftwnd_measure_runtime(p, in, istride, out, ostride);
+ p->plans = plans_buf;
+ p->nbuffers = FFTWND_NBUFFERS;
+ p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1);
+ t_buf = fftwnd_measure_runtime(p, in, istride, out, ostride);
+
+ /* pick the better one: */
+ if (t_nobuf < t_buf) { /* use unbuffered transform */
+ p->plans = plans_nobuf;
+ p->nbuffers = 0;
+
+ /* work array is unnecessarily large */
+ if (p->work)
+ fftw_free(p->work);
+ p->work = 0;
+
+ destroy_plan_array(rank, plans_buf);
+
+ /* allocate a work array of the correct size: */
+ p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1);
+ if (p->nwork && !(flags & FFTW_THREADSAFE)) {
+ p->work = (fftw_complex*) fftw_malloc(p->nwork
+ * sizeof(fftw_complex));
+ if (!p->work) {
+ fftwnd_destroy_plan(p);
+ return 0;
+ }
+ }
+ } else { /* use buffered transform */
+ destroy_plan_array(rank, plans_nobuf);
+ }
+ }
+
+ return p;
+}
+
+fftwnd_plan fftw2d_create_plan_specific(int nx, int ny,
+ fftw_direction dir, int flags,
+ fftw_complex *in, int istride,
+ fftw_complex *out, int ostride)
+{
+ int n[2];
+
+ n[0] = nx;
+ n[1] = ny;
+
+ return fftwnd_create_plan_specific(2, n, dir, flags,
+ in, istride, out, ostride);
+}
+
+fftwnd_plan fftw3d_create_plan_specific(int nx, int ny, int nz,
+ fftw_direction dir, int flags,
+ fftw_complex *in, int istride,
+ fftw_complex *out, int ostride)
+{
+ int n[3];
+
+ n[0] = nx;
+ n[1] = ny;
+ n[2] = nz;
+
+ return fftwnd_create_plan_specific(3, n, dir, flags,
+ in, istride, out, ostride);
+}
+
+/* Create a generic fftwnd plan: */
+
+fftwnd_plan fftwnd_create_plan(int rank, const int *n,
+ fftw_direction dir, int flags)
+{
+ return fftwnd_create_plan_specific(rank, n, dir, flags, 0, 1, 0, 1);
+}
+
+fftwnd_plan fftw2d_create_plan(int nx, int ny,
+ fftw_direction dir, int flags)
+{
+ return fftw2d_create_plan_specific(nx, ny, dir, flags, 0, 1, 0, 1);
+}
+
+fftwnd_plan fftw3d_create_plan(int nx, int ny, int nz,
+ fftw_direction dir, int flags)
+{
+ return fftw3d_create_plan_specific(nx, ny, nz, dir, flags, 0, 1, 0, 1);
+}
+
+/************************ Freeing the FFTWND Plan ************************/
+
+static void destroy_plan_array(int rank, fftw_plan *plans)
+{
+ if (plans) {
+ int i, j;
+
+ for (i = 0; i < rank; ++i) {
+ for (j = i - 1;
+ j >= 0 && plans[i] != plans[j];
+ --j);
+ if (j < 0 && plans[i])
+ fftw_destroy_plan(plans[i]);
+ }
+ fftw_free(plans);
+ }
+}
+
+void fftwnd_destroy_plan(fftwnd_plan plan)
+{
+ if (plan) {
+ destroy_plan_array(plan->rank, plan->plans);
+
+ if (plan->n)
+ fftw_free(plan->n);
+
+ if (plan->n_before)
+ fftw_free(plan->n_before);
+
+ if (plan->n_after)
+ fftw_free(plan->n_after);
+
+ if (plan->work)
+ fftw_free(plan->work);
+
+ fftw_free(plan);
+ }
+}
+
+/************************ Printing the FFTWND Plan ************************/
+
+void fftwnd_fprint_plan(FILE *f, fftwnd_plan plan)
+{
+ if (plan) {
+ int i, j;
+
+ if (plan->rank == 0) {
+ fprintf(f, "plan for rank 0 (null) transform.\n");
+ return;
+ }
+ fprintf(f, "plan for ");
+ for (i = 0; i < plan->rank; ++i)
+ fprintf(f, "%s%d", i ? "x" : "", plan->n[i]);
+ fprintf(f, " transform:\n");
+
+ if (plan->nbuffers > 0)
+ fprintf(f, " -- using buffered transforms (%d buffers)\n",
+ plan->nbuffers);
+ else
+ fprintf(f, " -- using unbuffered transform\n");
+
+ for (i = 0; i < plan->rank; ++i) {
+ fprintf(f, "* dimension %d (size %d) ", i, plan->n[i]);
+
+ for (j = i - 1; j >= 0; --j)
+ if (plan->plans[j] == plan->plans[i])
+ break;
+
+ if (j < 0)
+ fftw_fprint_plan(f, plan->plans[i]);
+ else
+ fprintf(f, "plan is same as dimension %d plan.\n", j);
+ }
+ }
+}
+
+void fftwnd_print_plan(fftwnd_plan plan)
+{
+ fftwnd_fprint_plan(stdout, plan);
+}
+
+/********************* Buffered FFTW (in-place) *********************/
+
+void fftw_buffered(fftw_plan p, int howmany,
+ fftw_complex *in, int istride, int idist,
+ fftw_complex *work,
+ int nbuffers, fftw_complex *buffers)
+{
+ int i = 0, n, nb;
+
+ n = p->n;
+ nb = n + FFTWND_BUFFER_PADDING;
+
+ do {
+ for (; i <= howmany - nbuffers; i += nbuffers) {
+ fftw_complex *cur_in = in + i * idist;
+ int j, buf;
+
+ /*
+ * First, copy nbuffers strided arrays to the
+ * contiguous buffer arrays (reading consecutive
+ * locations, assuming that idist is 1):
+ */
+ for (j = 0; j < n; ++j) {
+ fftw_complex *cur_in2 = cur_in + j * istride;
+ fftw_complex *cur_buffers = buffers + j;
+
+ for (buf = 0; buf <= nbuffers - 4; buf += 4) {
+ fftw_real r0, i0, r1, i1, r2, i2, r3, i3;
+ r0 = c_re(cur_in2[0]);
+ i0 = c_im(cur_in2[0]);
+ r1 = c_re(cur_in2[idist]);
+ i1 = c_im(cur_in2[idist]);
+ r2 = c_re(cur_in2[2 * idist]);
+ i2 = c_im(cur_in2[2 * idist]);
+ r3 = c_re(cur_in2[3 * idist]);
+ i3 = c_im(cur_in2[3 * idist]);
+ c_re(cur_buffers[0]) = r0;
+ c_im(cur_buffers[0]) = i0;
+ c_re(cur_buffers[nb]) = r1;
+ c_im(cur_buffers[nb]) = i1;
+ c_re(cur_buffers[2 * nb]) = r2;
+ c_im(cur_buffers[2 * nb]) = i2;
+ c_re(cur_buffers[3 * nb]) = r3;
+ c_im(cur_buffers[3 * nb]) = i3;
+ cur_buffers += 4 * nb;
+ cur_in2 += 4 * idist;
+ }
+ for (; buf < nbuffers; ++buf) {
+ *cur_buffers = *cur_in2;
+ cur_buffers += nb;
+ cur_in2 += idist;
+ }
+ }
+
+ /*
+ * Now, compute the FFTs in the buffers (in-place
+ * using work):
+ */
+ fftw(p, nbuffers, buffers, 1, nb, work, 1, 0);
+
+ /*
+ * Finally, copy the results back from the contiguous
+ * buffers to the strided arrays (writing consecutive
+ * locations):
+ */
+ for (j = 0; j < n; ++j) {
+ fftw_complex *cur_in2 = cur_in + j * istride;
+ fftw_complex *cur_buffers = buffers + j;
+
+ for (buf = 0; buf <= nbuffers - 4; buf += 4) {
+ fftw_real r0, i0, r1, i1, r2, i2, r3, i3;
+ r0 = c_re(cur_buffers[0]);
+ i0 = c_im(cur_buffers[0]);
+ r1 = c_re(cur_buffers[nb]);
+ i1 = c_im(cur_buffers[nb]);
+ r2 = c_re(cur_buffers[2 * nb]);
+ i2 = c_im(cur_buffers[2 * nb]);
+ r3 = c_re(cur_buffers[3 * nb]);
+ i3 = c_im(cur_buffers[3 * nb]);
+ c_re(cur_in2[0]) = r0;
+ c_im(cur_in2[0]) = i0;
+ c_re(cur_in2[idist]) = r1;
+ c_im(cur_in2[idist]) = i1;
+ c_re(cur_in2[2 * idist]) = r2;
+ c_im(cur_in2[2 * idist]) = i2;
+ c_re(cur_in2[3 * idist]) = r3;
+ c_im(cur_in2[3 * idist]) = i3;
+ cur_buffers += 4 * nb;
+ cur_in2 += 4 * idist;
+ }
+ for (; buf < nbuffers; ++buf) {
+ *cur_in2 = *cur_buffers;
+ cur_buffers += nb;
+ cur_in2 += idist;
+ }
+ }
+ }
+
+ /*
+ * we skip howmany % nbuffers ffts at the end of the loop,
+ * so we have to go back and do them:
+ */
+ nbuffers = howmany - i;
+ } while (i < howmany);
+}
+
+/********************* Computing the N-Dimensional FFT *********************/
+
+void fftwnd_aux(fftwnd_plan p, int cur_dim,
+ fftw_complex *in, int istride,
+ fftw_complex *out, int ostride,
+ fftw_complex *work)
+{
+ int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
+
+ if (cur_dim == p->rank - 2) {
+ /* just do the last dimension directly: */
+ if (p->is_in_place)
+ fftw(p->plans[p->rank - 1], n,
+ in, istride, n_after * istride,
+ work, 1, 0);
+ else
+ fftw(p->plans[p->rank - 1], n,
+ in, istride, n_after * istride,
+ out, ostride, n_after * ostride);
+ } else { /* we have at least two dimensions to go */
+ int i;
+
+ /*
+ * process the subsequent dimensions recursively, in hyperslabs,
+ * to get maximum locality:
+ */
+ for (i = 0; i < n; ++i)
+ fftwnd_aux(p, cur_dim + 1,
+ in + i * n_after * istride, istride,
+ out + i * n_after * ostride, ostride, work);
+ }
+
+ /* do the current dimension (in-place): */
+ if (p->nbuffers == 0) {
+ fftw(p->plans[cur_dim], n_after,
+ out, n_after * ostride, ostride,
+ work, 1, 0);
+ } else /* using contiguous copy buffers: */
+ fftw_buffered(p->plans[cur_dim], n_after,
+ out, n_after * ostride, ostride,
+ work, p->nbuffers, work + n);
+}
+
+/*
+ * alternate version of fftwnd_aux -- this version pushes the howmany
+ * loop down to the leaves of the computation, for greater locality in
+ * cases where dist < stride
+ */
+void fftwnd_aux_howmany(fftwnd_plan p, int cur_dim,
+ int howmany,
+ fftw_complex *in, int istride, int idist,
+ fftw_complex *out, int ostride, int odist,
+ fftw_complex *work)
+{
+ int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
+ int k;
+
+ if (cur_dim == p->rank - 2) {
+ /* just do the last dimension directly: */
+ if (p->is_in_place)
+ for (k = 0; k < n; ++k)
+ fftw(p->plans[p->rank - 1], howmany,
+ in + k * n_after * istride, istride, idist,
+ work, 1, 0);
+ else
+ for (k = 0; k < n; ++k)
+ fftw(p->plans[p->rank - 1], howmany,
+ in + k * n_after * istride, istride, idist,
+ out + k * n_after * ostride, ostride, odist);
+ } else { /* we have at least two dimensions to go */
+ int i;
+
+ /*
+ * process the subsequent dimensions recursively, in
+ * hyperslabs, to get maximum locality:
+ */
+ for (i = 0; i < n; ++i)
+ fftwnd_aux_howmany(p, cur_dim + 1, howmany,
+ in + i * n_after * istride, istride, idist,
+ out + i * n_after * ostride, ostride, odist,
+ work);
+ }
+
+ /* do the current dimension (in-place): */
+ if (p->nbuffers == 0)
+ for (k = 0; k < n_after; ++k)
+ fftw(p->plans[cur_dim], howmany,
+ out + k * ostride, n_after * ostride, odist,
+ work, 1, 0);
+ else /* using contiguous copy buffers: */
+ for (k = 0; k < n_after; ++k)
+ fftw_buffered(p->plans[cur_dim], howmany,
+ out + k * ostride, n_after * ostride, odist,
+ work, p->nbuffers, work + n);
+}
+
+void fftwnd(fftwnd_plan p, int howmany,
+ fftw_complex *in, int istride, int idist,
+ fftw_complex *out, int ostride, int odist)
+{
+ fftw_complex *work;
+
+#ifdef FFTW_DEBUG
+ if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE)
+ && p->nwork && p->work)
+ fftw_die("bug with FFTW_THREADSAFE flag\n");
+#endif
+
+ if (p->nwork && !p->work)
+ work = (fftw_complex *) fftw_malloc(p->nwork * sizeof(fftw_complex));
+ else
+ work = p->work;
+
+ switch (p->rank) {
+ case 0:
+ break;
+ case 1:
+ if (p->is_in_place) /* fft is in-place */
+ fftw(p->plans[0], howmany, in, istride, idist,
+ work, 1, 0);
+ else
+ fftw(p->plans[0], howmany, in, istride, idist,
+ out, ostride, odist);
+ break;
+ default: /* rank >= 2 */
+ {
+ if (p->is_in_place) {
+ out = in;
+ ostride = istride;
+ odist = idist;
+ }
+ if (howmany > 1 && odist < ostride)
+ fftwnd_aux_howmany(p, 0, howmany,
+ in, istride, idist,
+ out, ostride, odist,
+ work);
+ else {
+ int i;
+
+ for (i = 0; i < howmany; ++i)
+ fftwnd_aux(p, 0,
+ in + i * idist, istride,
+ out + i * odist, ostride,
+ work);
+ }
+ }
+ }
+
+ if (p->nwork && !p->work)
+ fftw_free(work);
+
+}
+
+void fftwnd_one(fftwnd_plan p, fftw_complex *in, fftw_complex *out)
+{
+ fftwnd(p, 1, in, 1, 1, out, 1, 1);
+}