summaryrefslogtreecommitdiff
path: root/src/fftw/rader.c
diff options
context:
space:
mode:
authorscuri <scuri>2008-10-17 06:10:15 +0000
committerscuri <scuri>2008-10-17 06:10:15 +0000
commit5a422aba704c375a307a902bafe658342e209906 (patch)
tree5005011e086bb863d8fb587ad3319bbec59b2447 /src/fftw/rader.c
First commit - moving from LuaForge to SourceForge
Diffstat (limited to 'src/fftw/rader.c')
-rw-r--r--src/fftw/rader.c365
1 files changed, 365 insertions, 0 deletions
diff --git a/src/fftw/rader.c b/src/fftw/rader.c
new file mode 100644
index 0000000..156529b
--- /dev/null
+++ b/src/fftw/rader.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+/*
+ * Compute transforms of prime sizes using Rader's trick: turn them
+ * into convolutions of size n - 1, which you then perform via a pair
+ * of FFTs.
+ */
+
+#include <stdlib.h>
+#include <math.h>
+
+#include "fftw-int.h"
+
+#ifdef FFTW_DEBUG
+#define WHEN_DEBUG(a) a
+#else
+#define WHEN_DEBUG(a)
+#endif
+
+/* compute n^m mod p, where m >= 0 and p > 0. */
+static int power_mod(int n, int m, int p)
+{
+ if (m == 0)
+ return 1;
+ else if (m % 2 == 0) {
+ int x = power_mod(n, m / 2, p);
+ return MULMOD(x, x, p);
+ }
+ else
+ return MULMOD(n, power_mod(n, m - 1, p), p);
+}
+
+/*
+ * Find the period of n in the multiplicative group mod p (p prime).
+ * That is, return the smallest m such that n^m == 1 mod p.
+ */
+static int period(int n, int p)
+{
+ int prod = n, period = 1;
+
+ while (prod != 1) {
+ prod = MULMOD(prod, n, p);
+ ++period;
+ if (prod == 0)
+ fftw_die("non-prime order in Rader\n");
+ }
+ return period;
+}
+
+/* find a generator for the multiplicative group mod p, where p is prime */
+static int find_generator(int p)
+{
+ int g;
+
+ for (g = 1; g < p; ++g)
+ if (period(g, p) == p - 1)
+ break;
+ if (g == p)
+ fftw_die("couldn't find generator for Rader\n");
+ return g;
+}
+
+/***************************************************************************/
+
+static fftw_rader_data *create_rader_aux(int p, int flags)
+{
+ fftw_complex *omega, *work;
+ int g, ginv, gpower;
+ int i;
+ FFTW_TRIG_REAL twoPiOverN;
+ fftw_real scale = 1.0 / (p - 1); /* for convolution */
+ fftw_plan plan;
+ fftw_rader_data *d;
+
+ if (p < 2)
+ fftw_die("non-prime order in Rader\n");
+
+ flags &= ~FFTW_IN_PLACE;
+
+ d = (fftw_rader_data *) fftw_malloc(sizeof(fftw_rader_data));
+
+ g = find_generator(p);
+ ginv = power_mod(g, p - 2, p);
+
+ omega = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex));
+
+ plan = fftw_create_plan(p - 1, FFTW_FORWARD,
+ flags & ~FFTW_NO_VECTOR_RECURSE);
+
+ work = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex));
+
+ twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) p;
+ gpower = 1;
+ for (i = 0; i < p - 1; ++i) {
+ c_re(work[i]) = scale * FFTW_TRIG_COS(twoPiOverN * gpower);
+ c_im(work[i]) = FFTW_FORWARD * scale * FFTW_TRIG_SIN(twoPiOverN
+ * gpower);
+ gpower = MULMOD(gpower, ginv, p);
+ }
+
+ /* fft permuted roots of unity */
+ fftw_executor_simple(p - 1, work, omega, plan->root, 1, 1,
+ plan->recurse_kind);
+
+ fftw_free(work);
+
+ d->plan = plan;
+ d->omega = omega;
+ d->g = g;
+ d->ginv = ginv;
+ d->p = p;
+ d->flags = flags;
+ d->refcount = 1;
+ d->next = NULL;
+
+ d->cdesc = (fftw_codelet_desc *) fftw_malloc(sizeof(fftw_codelet_desc));
+ d->cdesc->name = NULL;
+ d->cdesc->codelet = NULL;
+ d->cdesc->size = p;
+ d->cdesc->dir = FFTW_FORWARD;
+ d->cdesc->type = FFTW_RADER;
+ d->cdesc->signature = g;
+ d->cdesc->ntwiddle = 0;
+ d->cdesc->twiddle_order = NULL;
+ return d;
+}
+
+/***************************************************************************/
+
+static fftw_rader_data *fftw_create_rader(int p, int flags)
+{
+ fftw_rader_data *d = fftw_rader_top;
+
+ flags &= ~FFTW_IN_PLACE;
+ while (d && (d->p != p || d->flags != flags))
+ d = d->next;
+ if (d) {
+ d->refcount++;
+ return d;
+ }
+ d = create_rader_aux(p, flags);
+ d->next = fftw_rader_top;
+ fftw_rader_top = d;
+ return d;
+}
+
+/***************************************************************************/
+
+/* Compute the prime FFTs, premultiplied by twiddle factors. Below, we
+ * extensively use the identity that fft(x*)* = ifft(x) in order to
+ * share data between forward and backward transforms and to obviate
+ * the necessity of having separate forward and backward plans. */
+
+void fftw_twiddle_rader(fftw_complex *A, const fftw_complex *W,
+ int m, int r, int stride,
+ fftw_rader_data * d)
+{
+ fftw_complex *tmp = (fftw_complex *)
+ fftw_malloc((r - 1) * sizeof(fftw_complex));
+ int i, k, gpower = 1, g = d->g, ginv = d->ginv;
+ fftw_real a0r, a0i;
+ fftw_complex *omega = d->omega;
+
+ for (i = 0; i < m; ++i, A += stride, W += r - 1) {
+ /*
+ * Here, we fft W[k-1] * A[k*(m*stride)], using Rader.
+ * (Actually, W is pre-permuted to match the permutation that we
+ * will do on A.)
+ */
+
+ /* First, permute the input and multiply by W, storing in tmp: */
+ /* gpower == g^k mod r in the following loop */
+ for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
+ fftw_real rA, iA, rW, iW;
+ rW = c_re(W[k]);
+ iW = c_im(W[k]);
+ rA = c_re(A[gpower * (m * stride)]);
+ iA = c_im(A[gpower * (m * stride)]);
+ c_re(tmp[k]) = rW * rA - iW * iA;
+ c_im(tmp[k]) = rW * iA + iW * rA;
+ }
+
+ WHEN_DEBUG( {
+ if (gpower != 1)
+ fftw_die("incorrect generator in Rader\n");
+ }
+ );
+
+ /* FFT tmp to A: */
+ fftw_executor_simple(r - 1, tmp, A + (m * stride),
+ d->plan->root, 1, m * stride,
+ d->plan->recurse_kind);
+
+ /* set output DC component: */
+ a0r = c_re(A[0]);
+ a0i = c_im(A[0]);
+ c_re(A[0]) += c_re(A[(m * stride)]);
+ c_im(A[0]) += c_im(A[(m * stride)]);
+
+ /* now, multiply by omega: */
+ for (k = 0; k < r - 1; ++k) {
+ fftw_real rA, iA, rW, iW;
+ rW = c_re(omega[k]);
+ iW = c_im(omega[k]);
+ rA = c_re(A[(k + 1) * (m * stride)]);
+ iA = c_im(A[(k + 1) * (m * stride)]);
+ c_re(A[(k + 1) * (m * stride)]) = rW * rA - iW * iA;
+ c_im(A[(k + 1) * (m * stride)]) = -(rW * iA + iW * rA);
+ }
+
+ /* this will add A[0] to all of the outputs after the ifft */
+ c_re(A[(m * stride)]) += a0r;
+ c_im(A[(m * stride)]) -= a0i;
+
+ /* inverse FFT: */
+ fftw_executor_simple(r - 1, A + (m * stride), tmp,
+ d->plan->root, m * stride, 1,
+ d->plan->recurse_kind);
+
+ /* finally, do inverse permutation to unshuffle the output: */
+ for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
+ c_re(A[gpower * (m * stride)]) = c_re(tmp[k]);
+ c_im(A[gpower * (m * stride)]) = -c_im(tmp[k]);
+ }
+
+ WHEN_DEBUG( {
+ if (gpower != 1)
+ fftw_die("incorrect generator in Rader\n");
+ }
+ );
+
+ }
+
+ fftw_free(tmp);
+}
+
+void fftwi_twiddle_rader(fftw_complex *A, const fftw_complex *W,
+ int m, int r, int stride,
+ fftw_rader_data * d)
+{
+ fftw_complex *tmp = (fftw_complex *)
+ fftw_malloc((r - 1) * sizeof(fftw_complex));
+ int i, k, gpower = 1, g = d->g, ginv = d->ginv;
+ fftw_real a0r, a0i;
+ fftw_complex *omega = d->omega;
+
+ for (i = 0; i < m; ++i, A += stride, W += r - 1) {
+ /*
+ * Here, we fft W[k-1]* * A[k*(m*stride)], using Rader.
+ * (Actually, W is pre-permuted to match the permutation that
+ * we will do on A.)
+ */
+
+ /* First, permute the input and multiply by W*, storing in tmp: */
+ /* gpower == g^k mod r in the following loop */
+ for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
+ fftw_real rA, iA, rW, iW;
+ rW = c_re(W[k]);
+ iW = c_im(W[k]);
+ rA = c_re(A[gpower * (m * stride)]);
+ iA = c_im(A[gpower * (m * stride)]);
+ c_re(tmp[k]) = rW * rA + iW * iA;
+ c_im(tmp[k]) = iW * rA - rW * iA;
+ }
+
+ WHEN_DEBUG( {
+ if (gpower != 1)
+ fftw_die("incorrect generator in Rader\n");
+ }
+ );
+
+ /* FFT tmp to A: */
+ fftw_executor_simple(r - 1, tmp, A + (m * stride),
+ d->plan->root, 1, m * stride,
+ d->plan->recurse_kind);
+
+ /* set output DC component: */
+ a0r = c_re(A[0]);
+ a0i = c_im(A[0]);
+ c_re(A[0]) += c_re(A[(m * stride)]);
+ c_im(A[0]) -= c_im(A[(m * stride)]);
+
+ /* now, multiply by omega: */
+ for (k = 0; k < r - 1; ++k) {
+ fftw_real rA, iA, rW, iW;
+ rW = c_re(omega[k]);
+ iW = c_im(omega[k]);
+ rA = c_re(A[(k + 1) * (m * stride)]);
+ iA = c_im(A[(k + 1) * (m * stride)]);
+ c_re(A[(k + 1) * (m * stride)]) = rW * rA - iW * iA;
+ c_im(A[(k + 1) * (m * stride)]) = -(rW * iA + iW * rA);
+ }
+
+ /* this will add A[0] to all of the outputs after the ifft */
+ c_re(A[(m * stride)]) += a0r;
+ c_im(A[(m * stride)]) += a0i;
+
+ /* inverse FFT: */
+ fftw_executor_simple(r - 1, A + (m * stride), tmp,
+ d->plan->root, m * stride, 1,
+ d->plan->recurse_kind);
+
+ /* finally, do inverse permutation to unshuffle the output: */
+ for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
+ A[gpower * (m * stride)] = tmp[k];
+ }
+
+ WHEN_DEBUG( {
+ if (gpower != 1)
+ fftw_die("incorrect generator in Rader\n");
+ }
+ );
+ }
+
+ fftw_free(tmp);
+}
+
+/***************************************************************************/
+
+/*
+ * Make an FFTW_RADER plan node. Note that this function must go
+ * here, rather than in putils.c, because it indirectly calls the
+ * fftw_planner. If we included it in putils.c, which is also used
+ * by rfftw, then any program using rfftw would be linked with all
+ * of the FFTW codelets, even if they were not needed. I wish that the
+ * darn linkers operated on a function rather than a file granularity.
+ */
+fftw_plan_node *fftw_make_node_rader(int n, int size, fftw_direction dir,
+ fftw_plan_node *recurse,
+ int flags)
+{
+ fftw_plan_node *p = fftw_make_node();
+
+ p->type = FFTW_RADER;
+ p->nodeu.rader.size = size;
+ p->nodeu.rader.codelet = dir == FFTW_FORWARD ?
+ fftw_twiddle_rader : fftwi_twiddle_rader;
+ p->nodeu.rader.rader_data = fftw_create_rader(size, flags);
+ p->nodeu.rader.recurse = recurse;
+ fftw_use_node(recurse);
+
+ if (flags & FFTW_MEASURE)
+ p->nodeu.rader.tw =
+ fftw_create_twiddle(n, p->nodeu.rader.rader_data->cdesc);
+ else
+ p->nodeu.rader.tw = 0;
+ return p;
+}