summaryrefslogtreecommitdiff
path: root/src/fftw3/kernel/planner.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/fftw3/kernel/planner.c')
-rw-r--r--src/fftw3/kernel/planner.c695
1 files changed, 695 insertions, 0 deletions
diff --git a/src/fftw3/kernel/planner.c b/src/fftw3/kernel/planner.c
new file mode 100644
index 0000000..1d01cb9
--- /dev/null
+++ b/src/fftw3/kernel/planner.c
@@ -0,0 +1,695 @@
+/*
+ * Copyright (c) 2000 Matteo Frigo
+ * Copyright (c) 2000 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+/* $Id: planner.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+#include "ifftw.h"
+#include <string.h>
+
+/* GNU Coding Standards, Sec. 5.2: "Please write the comments in a GNU
+ program in English, because English is the one language that nearly
+ all programmers in all countries can read."
+
+ ingemisco tanquam reus
+ culpa rubet vultus meus
+ supplicanti parce [rms]
+*/
+
+#define BLESSEDP(solution) ((solution)->flags & BLESSING)
+#define VALIDP(solution) ((solution)->flags & H_VALID)
+#define UNBLESS(flags) flags &= ~BLESSING
+
+#define MAXNAM 64 /* maximum length of registrar's name.
+ Used for reading wisdom. There is no point
+ in doing this right */
+
+/* Flags f1 subsumes flags f2 iff f1 is less/equally impatient than
+ f2, defining a partial ordering. */
+#define IMPATIENCE(flags) ((flags) & IMPATIENCE_FLAGS)
+#define NONIMPATIENCE(flags) ((flags) & NONIMPATIENCE_FLAGS)
+#define ORDERED(f1, f2) (SUBSUMES(f1, f2) || SUBSUMES(f2, f1))
+#define SUBSUMES(f1, f2) ((IMPATIENCE(f1) & (f2)) == IMPATIENCE(f1))
+
+static unsigned addmod(unsigned a, unsigned b, unsigned p)
+{
+ /* gcc-2.95/sparc produces incorrect code for the fast version below. */
+#if defined(__sparc__) && defined(__GNUC__)
+ /* slow version */
+ return (a + b) % p;
+#else
+ /* faster version */
+ unsigned c = a + b;
+ return c >= p ? c - p : c;
+#endif
+}
+
+/*
+ slvdesc management:
+*/
+static void sgrow(planner *ego)
+{
+ unsigned osiz = ego->slvdescsiz, nsiz = 1 + osiz + osiz / 4;
+ slvdesc *ntab = (slvdesc *)MALLOC(nsiz * sizeof(slvdesc), SLVDESCS);
+ slvdesc *otab = ego->slvdescs;
+ unsigned i;
+
+ ego->slvdescs = ntab;
+ ego->slvdescsiz = nsiz;
+ for (i = 0; i < osiz; ++i)
+ ntab[i] = otab[i];
+ X(ifree0)(otab);
+}
+
+static void register_solver(planner *ego, solver *s)
+{
+ slvdesc *n;
+ if (s) { /* add s to solver list */
+ X(solver_use)(s);
+
+ if (ego->nslvdesc >= ego->slvdescsiz)
+ sgrow(ego);
+
+ n = ego->slvdescs + ego->nslvdesc++;
+
+ n->slv = s;
+ n->reg_nam = ego->cur_reg_nam;
+ n->reg_id = ego->cur_reg_id++;
+
+ A(strlen(n->reg_nam) < MAXNAM);
+ n->nam_hash = X(hash)(n->reg_nam);
+ }
+}
+
+static int slookup(planner *ego, char *nam, int id)
+{
+ unsigned h = X(hash)(nam); /* used to avoid strcmp in the common case */
+ FORALL_SOLVERS(ego, s, sp, {
+ UNUSED(s);
+ if (sp->reg_id == id && sp->nam_hash == h
+ && !strcmp(sp->reg_nam, nam))
+ return sp - ego->slvdescs;
+ });
+ return -1;
+}
+
+/*
+ md5-related stuff:
+*/
+
+/* first hash function */
+static unsigned h1(planner *ego, const md5sig s)
+{
+ return s[0] % ego->hashsiz;
+}
+
+/* second hash function (for double hashing) */
+static unsigned h2(planner *ego, const md5sig s)
+{
+ return 1U + s[1] % (ego->hashsiz - 1);
+}
+
+static void md5hash(md5 *m, const problem *p, const planner *plnr)
+{
+ X(md5begin)(m);
+ X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */
+ X(md5unsigned)(m, plnr->problem_flags);
+ X(md5int)(m, plnr->nthr);
+ p->adt->hash(p, m);
+ X(md5end)(m);
+}
+
+static int md5eq(const md5sig a, const md5sig b)
+{
+ return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3];
+}
+
+static void sigcpy(const md5sig a, md5sig b)
+{
+ b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3];
+}
+
+/*
+ memoization routines :
+*/
+
+/*
+ liber scriptus proferetur
+ in quo totum continetur
+ unde mundus iudicetur
+*/
+struct solution_s {
+ md5sig s;
+ unsigned short flags;
+ short slvndx;
+};
+
+static solution *hlookup(planner *ego, const md5sig s, unsigned short flags)
+{
+ unsigned g, h = h1(ego, s), d = h2(ego, s);
+
+ ++ego->lookup;
+
+ for (g = h; ; g = addmod(g, d, ego->hashsiz)) {
+ solution *l = ego->solutions + g;
+ ++ego->lookup_iter;
+ if (VALIDP(l)) {
+ if (md5eq(s, l->s) && ORDERED(l->flags, flags)) {
+ ++ego->succ_lookup;
+ return l;
+ }
+ } else {
+ return 0;
+ }
+ A((g + d) % ego->hashsiz != h);
+ }
+}
+
+
+static void hinsert0(planner *ego, const md5sig s, unsigned short flags,
+ int slvndx, solution *l)
+{
+ ++ego->insert;
+ if (!l) {
+ /* search for nonfull slot */
+ unsigned g, h = h1(ego, s), d = h2(ego, s);
+ ++ego->insert_unknown;
+ for (g = h; ; g = addmod(g, d, ego->hashsiz)) {
+ ++ego->insert_iter;
+ l = ego->solutions + g;
+ if (!VALIDP(l)) break;
+ A((g + d) % ego->hashsiz != h);
+ }
+ }
+
+ /* fill slot */
+ l->flags = flags | H_VALID;
+ l->slvndx = (short)slvndx;
+ sigcpy(s, l->s);
+}
+
+static void rehash(planner *ego, unsigned nsiz)
+{
+ unsigned osiz = ego->hashsiz, h;
+ solution *osol = ego->solutions, *nsol;
+
+ nsiz = (unsigned)X(next_prime)((int)nsiz);
+ nsol = (solution *)MALLOC(nsiz * sizeof(solution), HASHT);
+ ++ego->nrehash;
+
+ /* init new table */
+ for (h = 0; h < nsiz; ++h)
+ nsol[h].flags = 0;
+
+ /* install new table */
+ ego->hashsiz = nsiz;
+ ego->solutions = nsol;
+
+ /* copy table */
+ for (h = 0; h < osiz; ++h) {
+ solution *l = osol + h;
+ if (VALIDP(l))
+ hinsert0(ego, l->s, l->flags, l->slvndx, 0);
+ }
+
+ X(ifree0)(osol);
+}
+
+static unsigned minsz(unsigned nelem)
+{
+ return 1U + nelem + nelem / 8U;
+}
+
+static unsigned nextsz(unsigned nelem)
+{
+ return minsz(minsz(nelem));
+}
+
+static void hgrow(planner *ego)
+{
+ unsigned nelem = ego->nelem;
+ if (minsz(nelem) >= ego->hashsiz)
+ rehash(ego, nextsz(nelem));
+}
+
+static void hshrink(planner *ego)
+{
+ unsigned nelem = ego->nelem;
+ /* always rehash after deletions */
+ rehash(ego, nextsz(nelem));
+}
+
+/* inherit blessing, but only if the solver is the same */
+static unsigned short merge_flags(unsigned short dstflags, int dstndx,
+ unsigned short srcflags, int srcndx)
+{
+ if (srcndx == dstndx)
+ dstflags |= (srcflags & BLESSING); /* ne me perdas illa die */
+ return dstflags;
+}
+
+static void hinsert(planner *ego, const md5sig s,
+ unsigned short flags, int slvndx)
+{
+ solution *l;
+
+ if ((l = hlookup(ego, s, flags))) {
+ if (SUBSUMES(flags, l->flags)) {
+ /* overwrite old solution */
+ flags = merge_flags(flags, slvndx, l->flags, l->slvndx);
+ } else {
+ A(SUBSUMES(l->flags, flags));
+ l->flags = merge_flags(l->flags, l->slvndx, flags, slvndx);
+ return;
+ }
+ } else {
+ ++ego->nelem;
+ hgrow(ego);
+ }
+ hinsert0(ego, s, flags, slvndx, l);
+}
+
+static void hcurse_subsumed(planner *ego)
+{
+ unsigned h;
+
+ /* unbless any entries that are unreachable because they
+ are subsumed by less-impatient ones. */
+ for (h = 0; h < ego->hashsiz; ++h) {
+ solution *l = ego->solutions + h;
+ if (VALIDP(l)) {
+ unsigned d = h2(ego, l->s), g = addmod(h, d, ego->hashsiz);
+ for (; ; g = addmod(g, d, ego->hashsiz)) {
+ solution *m = ego->solutions + g;
+ if (VALIDP(m)) {
+ if (md5eq(l->s, m->s) &&
+ SUBSUMES(l->flags, m->flags)) {
+ /* ne cadant in obscurum */
+ l->flags = merge_flags(l->flags, l->slvndx,
+ m->flags, m->slvndx);
+
+ /* cum vix justus sit securus */
+ UNBLESS(m->flags);
+ }
+ }
+ else break;
+ A((g + d) % ego->hashsiz != h);
+ }
+ }
+ }
+}
+
+
+static void invoke_hook(planner *ego, plan *pln, const problem *p,
+ int optimalp)
+{
+ if (ego->hook)
+ ego->hook(pln, p, optimalp);
+}
+
+static void evaluate_plan(planner *ego, plan *pln, const problem *p)
+{
+ if (!BELIEVE_PCOSTP(ego) || pln->pcost == 0.0) {
+ ego->nplan++;
+
+ if (ESTIMATEP(ego)) {
+ estimate:
+ /* heuristic */
+ pln->pcost = 0.0
+ + pln->ops.add
+ + pln->ops.mul
+ + 2 * pln->ops.fma
+ + pln->ops.other;
+ ego->epcost += pln->pcost;
+ } else {
+ double t = X(measure_execution_time)(pln, p);
+
+ if (t < 0) { /* unavailable cycle counter */
+ /* Real programmers can write FORTRAN in any language */
+ goto estimate;
+ }
+
+ pln->pcost = t;
+ ego->pcost += t;
+ }
+ }
+
+ invoke_hook(ego, pln, p, 0);
+}
+
+/* maintain dynamic scoping of flags, nthr: */
+static plan *invoke_solver(planner *ego, problem *p, solver *s,
+ unsigned short nflags)
+{
+ unsigned short planner_flags = ego->planner_flags;
+ unsigned problem_flags = ego->problem_flags;
+ int nthr = ego->nthr;
+ plan *pln;
+ ego->planner_flags = nflags;
+ pln = s->adt->mkplan(s, p, ego);
+ ego->problem_flags = problem_flags;
+ ego->nthr = nthr;
+ ego->planner_flags = planner_flags;
+ return pln;
+}
+
+static plan *search(planner *ego, problem *p, slvdesc **descp)
+{
+ plan *best = 0;
+ int best_not_yet_timed = 1;
+ int pass;
+
+ if (NO_SEARCHP(ego)) {
+ /* D("invalid search for %P %x\n", p, ego->planner_flags); */
+ return 0;
+ }
+
+ for (pass = 0; pass < 2; ++pass) {
+ unsigned short nflags = ego->planner_flags;
+
+ if (best) break;
+
+ switch (pass) {
+ case 0:
+ /* skip pass 0 during exhaustive search */
+ if (!NO_EXHAUSTIVEP(ego)) continue;
+ nflags |= NO_UGLY;
+ break;
+ case 1:
+ /* skip pass 1 if NO_UGLY */
+ if (NO_UGLYP(ego)) continue;
+ break;
+ }
+
+ FORALL_SOLVERS(ego, s, sp, {
+ plan *pln = invoke_solver(ego, p, s, nflags);
+
+ if (pln) {
+ if (best) {
+ if (best_not_yet_timed) {
+ evaluate_plan(ego, best, p);
+ best_not_yet_timed = 0;
+ }
+ evaluate_plan(ego, pln, p);
+ if (pln->pcost < best->pcost) {
+ X(plan_destroy_internal)(best);
+ best = pln;
+ *descp = sp;
+ } else {
+ X(plan_destroy_internal)(pln);
+ }
+ } else {
+ best = pln;
+ *descp = sp;
+ }
+ }
+ });
+ }
+
+ return best;
+}
+
+static plan *mkplan(planner *ego, problem *p)
+{
+ plan *pln;
+ md5 m;
+ slvdesc *sp;
+ unsigned short flags;
+ ASSERT_ALIGNED_DOUBLE;
+
+ /* Canonical form. */
+ if (!NO_EXHAUSTIVEP(ego)) ego->planner_flags &= ~NO_UGLY;
+
+ ++ego->nprob;
+ md5hash(&m, p, ego);
+
+ pln = 0;
+
+ {
+ solution *sol; /* new scope for sol */
+
+ if ((sol = hlookup(ego, m.s, ego->planner_flags))) {
+ if (SUBSUMES(sol->flags, ego->planner_flags)) {
+ /* wisdom is acceptable */
+ if (sol->slvndx < 0)
+ return 0; /* known to be infeasible */
+
+ /* use solver to obtain a plan */
+ sp = ego->slvdescs + sol->slvndx;
+ pln =
+ invoke_solver(ego, p, sp->slv,
+ (0
+ | NO_SEARCH
+ | IMPATIENCE(sol->flags)
+ | NONIMPATIENCE(ego->planner_flags) ));
+
+ /* if (!pln) then the entry is bogus, but
+ we currently do nothing about it. */
+ /* CAVEAS: Do not use ``sol'' here, because the
+ pointer is possibly dangling after the call to
+ invoke_solver(). */
+ } else {
+ A(SUBSUMES(ego->planner_flags, sol->flags));
+ }
+ }
+ }
+
+
+ if (!pln)
+ pln = search(ego, p, &sp);
+
+ flags = ego->planner_flags;
+
+ if (pln) {
+ /* Postulate de iure that NO_UGLY subsumes ~NO_UGLY if the
+ problem is feasible. Also postulate that NO_SEARCH
+ subsumes ~NO_SEARCH. */
+ flags &= ~(NO_UGLY | NO_SEARCH);
+ }
+
+ hinsert(ego, m.s, flags, pln ? sp - ego->slvdescs : -1);
+
+ if (pln)
+ invoke_hook(ego, pln, p, 1);
+ return pln;
+}
+
+/* destroy hash table entries. If FORGET_EVERYTHING, destroy the whole
+ table. If FORGET_ACCURSED, then destroy entries that are not blessed. */
+static void forget(planner *ego, amnesia a)
+{
+ unsigned h;
+
+ /* garbage-collect while we are at it */
+ if (a != FORGET_EVERYTHING)
+ hcurse_subsumed(ego);
+
+ for (h = 0; h < ego->hashsiz; ++h) {
+ solution *l = ego->solutions + h;
+ if (VALIDP(l)) {
+ if (a == FORGET_EVERYTHING ||
+ (a == FORGET_ACCURSED && !BLESSEDP(l))) {
+ /* confutatis maledictis
+ flammis acribus addictis */
+ l->flags &= ~H_VALID;
+ --ego->nelem;
+ }
+ }
+ }
+ /* nil inultum remanebit */
+
+ hshrink(ego);
+}
+
+static void htab_destroy(planner *ego)
+{
+ forget(ego, FORGET_EVERYTHING);
+ X(ifree)(ego->solutions);
+ ego->nelem = 0U;
+}
+
+/* FIXME: what sort of version information should we write? */
+#define WISDOM_PREAMBLE PACKAGE "-" VERSION " " STRINGIZE(X(wisdom))
+
+/* tantus labor non sit cassus */
+static void exprt(planner *ego, printer *p)
+{
+ unsigned h;
+
+ hcurse_subsumed(ego);
+
+ p->print(p, "(" WISDOM_PREAMBLE "%(");
+ for (h = 0; h < ego->hashsiz; ++h) {
+ solution *l = ego->solutions + h;
+ if (VALIDP(l) && BLESSEDP(l) && l->slvndx >= 0) {
+ slvdesc *sp = ego->slvdescs + l->slvndx;
+ /* qui salvandos salvas gratis
+ salva me fons pietatis */
+ p->print(p, "(%s %d #x%x #x%M #x%M #x%M #x%M)\n",
+ sp->reg_nam, sp->reg_id, (int)l->flags,
+ l->s[0], l->s[1], l->s[2], l->s[3]);
+ }
+ }
+ p->print(p, "%))\n");
+}
+
+/* mors stupebit et natura
+ cum resurget creatura */
+static int imprt(planner *ego, scanner *sc)
+{
+ char buf[MAXNAM + 1];
+ md5uint sig[4];
+ int flags;
+ int reg_id;
+ int slvndx;
+ solution *sol;
+
+ if (!sc->scan(sc, "(" WISDOM_PREAMBLE))
+ return 0; /* don't need to restore hashtable */
+
+ /* make a backup copy of the hash table (cache the hash) */
+ {
+ unsigned h, hsiz = ego->hashsiz;
+ sol = (solution *)MALLOC(hsiz * sizeof(solution), HASHT);
+ for (h = 0; h < hsiz; ++h)
+ sol[h] = ego->solutions[h];
+ }
+
+ while (1) {
+ if (sc->scan(sc, ")"))
+ break;
+
+ /* qua resurget ex favilla */
+ if (!sc->scan(sc, "(%*s %d #x%x #x%M #x%M #x%M #x%M)",
+ MAXNAM, buf, &reg_id, &flags,
+ sig + 0, sig + 1, sig + 2, sig + 3))
+ goto bad;
+
+ if ((slvndx = slookup(ego, buf, reg_id)) < 0)
+ goto bad;
+
+ /* inter oves locum praesta */
+ hinsert(ego, sig, (unsigned short)flags, slvndx);
+ }
+
+ X(ifree0)(sol);
+ return 1;
+
+ bad:
+ /* ``The wisdom of FFTW must be above suspicion.'' */
+ X(ifree0)(ego->solutions);
+ ego->solutions = sol;
+ return 0;
+}
+
+/*
+ * create a planner
+ */
+planner *X(mkplanner)(void)
+{
+ static const planner_adt padt = {
+ register_solver, mkplan, forget, exprt, imprt
+ };
+
+ planner *p = (planner *) MALLOC(sizeof(planner), PLANNERS);
+
+ p->adt = &padt;
+ p->nplan = p->nprob = p->nrehash = 0;
+ p->pcost = p->epcost = 0.0;
+ p->succ_lookup = p->lookup = p->lookup_iter = 0;
+ p->insert = p->insert_iter = p->insert_unknown = 0;
+ p->hook = 0;
+ p->cur_reg_nam = 0;
+
+ p->slvdescs = 0;
+ p->nslvdesc = p->slvdescsiz = 0;
+
+ p->solutions = 0;
+ p->hashsiz = p->nelem = 0U;
+
+ p->problem_flags = 0;
+ p->planner_flags = 0;
+ p->nthr = 1;
+
+ hgrow(p); /* so that hashsiz > 0 */
+
+ return p;
+}
+
+void X(planner_destroy)(planner *ego)
+{
+ /* destroy hash table */
+ htab_destroy(ego);
+
+ /* destroy solvdesc table */
+ FORALL_SOLVERS(ego, s, sp, {
+ UNUSED(sp);
+ X(solver_destroy)(s);
+ });
+
+ X(ifree0)(ego->slvdescs);
+ X(ifree)(ego); /* dona eis requiem */
+}
+
+plan *X(mkplan_d)(planner *ego, problem *p)
+{
+ plan *pln = ego->adt->mkplan(ego, p);
+ X(problem_destroy)(p);
+ return pln;
+}
+
+/*
+ * Debugging code:
+ */
+#ifdef FFTW_DEBUG
+
+void X(planner_dump)(planner *ego, int verbose)
+{
+ unsigned valid = 0, empty = 0, infeasible = 0;
+ unsigned h;
+ UNUSED(verbose); /* historical */
+
+ for (h = 0; h < ego->hashsiz; ++h) {
+ solution *l = ego->solutions + h;
+ if (VALIDP(l)) {
+ ++valid;
+ if (l->slvndx < 0) ++infeasible;
+ } else
+ ++empty;
+
+ }
+
+ D("nplan = %d\n", ego->nplan);
+ D("nprob = %d\n", ego->nprob);
+ D("pcost = %g\n", ego->pcost);
+ D("epcost = %g\n", ego->epcost);
+ D("lookup = %d\n", ego->lookup);
+ D("succ_lookup = %d\n", ego->succ_lookup);
+ D("lookup_iter = %d\n", ego->lookup_iter);
+ D("insert = %d\n", ego->insert);
+ D("insert_iter = %d\n", ego->insert_iter);
+ D("insert_unknown = %d\n", ego->insert_unknown);
+ D("nrehash = %d\n", ego->nrehash);
+ D("hashsiz = %u\n", ego->hashsiz);
+ D("empty = %d\n", empty);
+ D("valid = %d\n", valid);
+ D("infeasible = %d\n", infeasible);
+ A(ego->nelem == valid);
+}
+
+#endif