diff options
author | scuri <scuri> | 2008-10-17 06:10:15 +0000 |
---|---|---|
committer | scuri <scuri> | 2008-10-17 06:10:15 +0000 |
commit | 5a422aba704c375a307a902bafe658342e209906 (patch) | |
tree | 5005011e086bb863d8fb587ad3319bbec59b2447 /src/fftw/planner.c |
First commit - moving from LuaForge to SourceForge
Diffstat (limited to 'src/fftw/planner.c')
-rw-r--r-- | src/fftw/planner.c | 475 |
1 files changed, 475 insertions, 0 deletions
diff --git a/src/fftw/planner.c b/src/fftw/planner.c new file mode 100644 index 0000000..30217d2 --- /dev/null +++ b/src/fftw/planner.c @@ -0,0 +1,475 @@ +/* + * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* + * planner.c -- find the optimal plan + */ + +/* $Id: planner.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */ +#include "fftw-int.h" +#include <stdlib.h> +#include <stdio.h> + +extern fftw_generic_codelet fftw_twiddle_generic; +extern fftw_generic_codelet fftwi_twiddle_generic; +extern fftw_codelet_desc *fftw_config[]; + +fftw_plan_hook_ptr fftw_plan_hook = (fftw_plan_hook_ptr) NULL; + +static void init_test_array(fftw_complex *arr, int stride, int n) +{ + int j; + + for (j = 0; j < n; ++j) { + c_re(arr[stride * j]) = 0.0; + c_im(arr[stride * j]) = 0.0; + } +} + +/* + * The timer keeps doubling the number of iterations + * until the program runs for more than FFTW_TIME_MIN + */ +static double fftw_measure_runtime(fftw_plan plan, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + fftw_time begin, end, start; + double t, tmax, tmin; + int i, iter; + int n; + int repeat; + int howmany = plan->vector_size; + + n = plan->n; + + iter = 1; + + for (;;) { + tmin = 1.0E10; + tmax = -1.0E10; + init_test_array(in, istride, n * howmany); + + start = fftw_get_time(); + /* repeat the measurement FFTW_TIME_REPEAT times */ + for (repeat = 0; repeat < FFTW_TIME_REPEAT; ++repeat) { + begin = fftw_get_time(); + for (i = 0; i < iter; ++i) { + fftw(plan, howmany, in, istride, istride, + out, ostride, ostride); + } + end = fftw_get_time(); + + t = fftw_time_to_sec(fftw_time_diff(end, begin)); + if (t < tmin) + tmin = t; + if (t > tmax) + tmax = t; + + /* do not run for too long */ + t = fftw_time_to_sec(fftw_time_diff(end, start)); + if (t > FFTW_TIME_LIMIT) + break; + } + + if (tmin >= FFTW_TIME_MIN) + break; + + iter *= 2; + } + + tmin /= (double) iter; + tmax /= (double) iter; + + return tmin; +} + +/* auxiliary functions */ +static void compute_cost(fftw_plan plan, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + if (plan->flags & FFTW_MEASURE) + plan->cost = fftw_measure_runtime(plan, in, istride, out, ostride); + else { + double c; + c = plan->n * fftw_estimate_node(plan->root) * plan->vector_size; + plan->cost = c; + } +} + +static void run_plan_hooks(fftw_plan p) +{ + if (fftw_plan_hook && p) { + fftw_complete_twiddle(p->root, p->n); + fftw_plan_hook(p); + } +} + + +/* macrology */ +#define FOR_ALL_CODELETS(p) \ + fftw_codelet_desc **__q, *p; \ + for (__q = &fftw_config[0]; (p = (*__q)); ++__q) + +/****************************************** + * Recursive planner * + ******************************************/ +static fftw_plan planner(fftw_plan *table, int n, fftw_direction dir, + int flags, int vector_size, + fftw_complex *, int, fftw_complex *, int); + +/* + * the planner consists of two parts: one that tries to + * use accumulated wisdom, and one that does not. + * A small driver invokes both parts in sequence + */ + +/* planner with wisdom: look up the codelet suggested by the wisdom */ +static fftw_plan planner_wisdom(fftw_plan *table, int n, + fftw_direction dir, int flags, + int vector_size, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + fftw_plan best = (fftw_plan) 0; + fftw_plan_node *node; + int have_wisdom; + enum fftw_node_type wisdom_type; + int wisdom_signature; + fftw_recurse_kind wisdom_recurse_kind; + + /* see if we remember any wisdom for this case */ + have_wisdom = fftw_wisdom_lookup(n, flags, dir, FFTW_WISDOM, + istride, ostride, + &wisdom_type, &wisdom_signature, + &wisdom_recurse_kind, 0); + + if (!have_wisdom) + return best; + + if (wisdom_type == FFTW_NOTW) { + FOR_ALL_CODELETS(p) { + if (p->dir == dir && p->type == wisdom_type) { + /* see if wisdom applies */ + if (wisdom_signature == p->signature && + p->size == n) { + node = fftw_make_node_notw(n, p); + best = fftw_make_plan(n, dir, node, flags, + p->type, p->signature, + FFTW_NORMAL_RECURSE, + vector_size); + fftw_use_plan(best); + run_plan_hooks(best); + return best; + } + } + } + } + if (wisdom_type == FFTW_TWIDDLE) { + FOR_ALL_CODELETS(p) { + if (p->dir == dir && p->type == wisdom_type) { + + /* see if wisdom applies */ + if (wisdom_signature == p->signature && + p->size > 1 && + (n % p->size) == 0) { + fftw_plan r = planner(table, n / p->size, dir, + flags | FFTW_NO_VECTOR_RECURSE, + wisdom_recurse_kind == + FFTW_VECTOR_RECURSE ? + p->size : vector_size, + in, istride, out, ostride); + node = fftw_make_node_twiddle(n, p, + r->root, flags); + best = fftw_make_plan(n, dir, node, flags, + p->type, p->signature, + wisdom_recurse_kind, + vector_size); + fftw_use_plan(best); + run_plan_hooks(best); + fftw_destroy_plan_internal(r); + return best; + } + } + } + } + /* + * BUG (or: TODO) Can we have generic wisdom? This is probably + * an academic question + */ + + return best; +} + +/* + * planner with no wisdom: try all combinations and pick + * the best + */ +static fftw_plan planner_normal(fftw_plan *table, int n, fftw_direction dir, + int flags, int vector_size, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + fftw_plan best = (fftw_plan) 0; + fftw_plan newplan; + fftw_plan_node *node; + + /* see if we have any codelet that solves the problem */ + { + FOR_ALL_CODELETS(p) { + if (p->dir == dir && p->type == FFTW_NOTW) { + if (p->size == n) { + node = fftw_make_node_notw(n, p); + newplan = fftw_make_plan(n, dir, node, flags, + p->type, p->signature, + FFTW_NORMAL_RECURSE, + vector_size); + fftw_use_plan(newplan); + compute_cost(newplan, in, istride, out, ostride); + run_plan_hooks(newplan); + best = fftw_pick_better(newplan, best); + } + } + } + } + + /* Then, try all available twiddle codelets */ + { + FOR_ALL_CODELETS(p) { + if (p->dir == dir && p->type == FFTW_TWIDDLE) { + if ((n % p->size) == 0 && + p->size > 1 && + (!best || n != p->size)) { + fftw_plan r = planner(table, n / p->size, dir, + flags | FFTW_NO_VECTOR_RECURSE, + vector_size, + in, istride, out, ostride); + node = fftw_make_node_twiddle(n, p, + r->root, flags); + newplan = fftw_make_plan(n, dir, node, flags, + p->type, p->signature, + FFTW_NORMAL_RECURSE, + vector_size); + fftw_use_plan(newplan); + fftw_destroy_plan_internal(r); + compute_cost(newplan, in, istride, out, ostride); + run_plan_hooks(newplan); + best = fftw_pick_better(newplan, best); + } + } + } + } + + /* try vector recursion unless prohibited by the flags: */ + if (! (flags & FFTW_NO_VECTOR_RECURSE)) { + FOR_ALL_CODELETS(p) { + if (p->dir == dir && p->type == FFTW_TWIDDLE) { + if ((n % p->size) == 0 && + p->size > 1 && + (!best || n != p->size)) { + fftw_plan r = planner(table, n / p->size, dir, + flags | FFTW_NO_VECTOR_RECURSE, + p->size, + in, istride, out, ostride); + node = fftw_make_node_twiddle(n, p, + r->root, flags); + newplan = fftw_make_plan(n, dir, node, flags, + p->type, p->signature, + FFTW_VECTOR_RECURSE, + vector_size); + fftw_use_plan(newplan); + fftw_destroy_plan_internal(r); + compute_cost(newplan, in, istride, out, ostride); + run_plan_hooks(newplan); + best = fftw_pick_better(newplan, best); + } + } + } + } + + /* + * resort to generic or rader codelets for unknown factors + */ + { + fftw_generic_codelet *codelet = (dir == FFTW_FORWARD ? + fftw_twiddle_generic : + fftwi_twiddle_generic); + int size, prev_size = 0, remaining_factors = n; + fftw_plan r; + + while (remaining_factors > 1) { + size = fftw_factor(remaining_factors); + remaining_factors /= size; + + /* don't try the same factor more than once */ + if (size == prev_size) + continue; + prev_size = size; + + /* Look for codelets corresponding to this factor. */ + { + FOR_ALL_CODELETS(p) { + if (p->dir == dir && p->type == FFTW_TWIDDLE + && p->size == size) { + size = 0; + break; + } + } + } + + /* + * only try a generic/rader codelet if there were no + * twiddle codelets for this factor + */ + if (!size) + continue; + + r = planner(table, n / size, dir, + flags | FFTW_NO_VECTOR_RECURSE, + vector_size, + in, istride, out, ostride); + + /* Try Rader codelet: */ + node = fftw_make_node_rader(n, size, dir, r->root, flags); + newplan = fftw_make_plan(n, dir, node, flags, FFTW_RADER, 0, + FFTW_NORMAL_RECURSE, vector_size); + fftw_use_plan(newplan); + compute_cost(newplan, in, istride, out, ostride); + run_plan_hooks(newplan); + best = fftw_pick_better(newplan, best); + + if (size < 100) { /* + * only try generic for small + * sizes + */ + /* Try generic codelet: */ + node = fftw_make_node_generic(n, size, codelet, + r->root, flags); + newplan = fftw_make_plan(n, dir, node, flags, + FFTW_GENERIC, 0, + FFTW_NORMAL_RECURSE, vector_size); + fftw_use_plan(newplan); + compute_cost(newplan, in, istride, out, ostride); + run_plan_hooks(newplan); + best = fftw_pick_better(newplan, best); + } + fftw_destroy_plan_internal(r); + } + } + + if (!best) + fftw_die("bug in planner\n"); + + return best; +} + +static fftw_plan planner(fftw_plan *table, int n, fftw_direction dir, + int flags, int vector_size, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + fftw_plan best = (fftw_plan) 0; + + if (vector_size > 1) + flags |= FFTW_NO_VECTOR_RECURSE; + + /* see if plan has already been computed */ + best = fftw_lookup(table, n, flags, vector_size); + if (best) { + fftw_use_plan(best); + return best; + } + /* try a wise plan */ + best = planner_wisdom(table, n, dir, flags, vector_size, + in, istride, out, ostride); + + if (!best) { + /* No wisdom. Plan normally. */ + best = planner_normal(table, n, dir, flags, + vector_size, + in, istride, out, ostride); + } + if (best) { + fftw_insert(table, best); + + /* remember the wisdom */ + fftw_wisdom_add(n, flags, dir, FFTW_WISDOM, istride, ostride, + best->wisdom_type, + best->wisdom_signature, + best->recurse_kind); + } + return best; +} + +fftw_plan fftw_create_plan_specific(int n, fftw_direction dir, int flags, + fftw_complex *in, int istride, + fftw_complex *out, int ostride) +{ + fftw_plan table; + fftw_plan p1; + + /* validate parameters */ + if (n <= 0) + return (fftw_plan) 0; + +#ifndef FFTW_ENABLE_VECTOR_RECURSE + /* TEMPORARY: disable vector recursion until it is more tested. */ + flags |= FFTW_NO_VECTOR_RECURSE; +#endif + + if ((dir != FFTW_FORWARD) && (dir != FFTW_BACKWARD)) + return (fftw_plan) 0; + + fftw_make_empty_table(&table); + p1 = planner(&table, n, dir, flags, 1, + in, istride, out, ostride); + fftw_destroy_table(&table); + + if (p1) + fftw_complete_twiddle(p1->root, n); + return p1; +} + +fftw_plan fftw_create_plan(int n, fftw_direction dir, int flags) +{ + fftw_complex *tmp_in, *tmp_out; + fftw_plan p; + + if (flags & FFTW_MEASURE) { + tmp_in = (fftw_complex *) fftw_malloc(2 * n * sizeof(fftw_complex)); + if (!tmp_in) + return 0; + tmp_out = tmp_in + n; + + p = fftw_create_plan_specific(n, dir, flags, + tmp_in, 1, tmp_out, 1); + + fftw_free(tmp_in); + } else + p = fftw_create_plan_specific(n, dir, flags, + (fftw_complex *) 0, 1, (fftw_complex *) 0, 1); + + return p; +} + +void fftw_destroy_plan(fftw_plan plan) +{ + fftw_destroy_plan_internal(plan); +} |