/*
 * Copyright (c) 2000 Matteo Frigo
 * Copyright (c) 2000 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

/* $Id: planner.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
#include "ifftw.h"
#include <string.h>

/* GNU Coding Standards, Sec. 5.2: "Please write the comments in a GNU
   program in English, because English is the one language that nearly
   all programmers in all countries can read."

                    ingemisco tanquam reus
		    culpa rubet vultus meus
		    supplicanti parce [rms]
*/

#define BLESSEDP(solution) ((solution)->flags & BLESSING)
#define VALIDP(solution) ((solution)->flags & H_VALID)
#define UNBLESS(flags)  flags &= ~BLESSING

#define MAXNAM 64  /* maximum length of registrar's name.
		      Used for reading wisdom.  There is no point
		      in doing this right */

/* Flags f1 subsumes flags f2 iff f1 is less/equally impatient than
   f2, defining a partial ordering. */
#define IMPATIENCE(flags) ((flags) & IMPATIENCE_FLAGS)
#define NONIMPATIENCE(flags) ((flags) & NONIMPATIENCE_FLAGS)
#define ORDERED(f1, f2) (SUBSUMES(f1, f2) || SUBSUMES(f2, f1))
#define SUBSUMES(f1, f2) ((IMPATIENCE(f1) & (f2)) == IMPATIENCE(f1))

static unsigned addmod(unsigned a, unsigned b, unsigned p)
{
     /* gcc-2.95/sparc produces incorrect code for the fast version below. */
#if defined(__sparc__) && defined(__GNUC__)
     /* slow version  */
     return (a + b) % p;
#else
     /* faster version */
     unsigned c = a + b;
     return c >= p ? c - p : c;
#endif
}

/*
  slvdesc management:
*/
static void sgrow(planner *ego)
{
     unsigned osiz = ego->slvdescsiz, nsiz = 1 + osiz + osiz / 4;
     slvdesc *ntab = (slvdesc *)MALLOC(nsiz * sizeof(slvdesc), SLVDESCS);
     slvdesc *otab = ego->slvdescs;
     unsigned i;

     ego->slvdescs = ntab;
     ego->slvdescsiz = nsiz;
     for (i = 0; i < osiz; ++i)
	  ntab[i] = otab[i];
     X(ifree0)(otab);
}

static void register_solver(planner *ego, solver *s)
{
     slvdesc *n;
     if (s) { /* add s to solver list */
	  X(solver_use)(s);

	  if (ego->nslvdesc >= ego->slvdescsiz)
	       sgrow(ego);

	  n = ego->slvdescs + ego->nslvdesc++;

	  n->slv = s;
	  n->reg_nam = ego->cur_reg_nam;
	  n->reg_id = ego->cur_reg_id++;
	  
	  A(strlen(n->reg_nam) < MAXNAM);
	  n->nam_hash = X(hash)(n->reg_nam);
     }
}

static int slookup(planner *ego, char *nam, int id)
{
     unsigned h = X(hash)(nam); /* used to avoid strcmp in the common case */
     FORALL_SOLVERS(ego, s, sp, {
	  UNUSED(s);
	  if (sp->reg_id == id && sp->nam_hash == h
	      && !strcmp(sp->reg_nam, nam))
	       return sp - ego->slvdescs;
     });
     return -1;
}

/*
  md5-related stuff:
*/

/* first hash function */
static unsigned h1(planner *ego, const md5sig s)
{
     return s[0] % ego->hashsiz;
}

/* second hash function (for double hashing) */
static unsigned h2(planner *ego, const md5sig s)
{
     return 1U + s[1] % (ego->hashsiz - 1);
}

static void md5hash(md5 *m, const problem *p, const planner *plnr)
{
     X(md5begin)(m);
     X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */
     X(md5unsigned)(m, plnr->problem_flags);
     X(md5int)(m, plnr->nthr);
     p->adt->hash(p, m);
     X(md5end)(m);
}

static int md5eq(const md5sig a, const md5sig b)
{
     return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3];
}

static void sigcpy(const md5sig a, md5sig b)
{
     b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3];
}

/*
  memoization routines :
*/

/*
   liber scriptus proferetur
   in quo totum continetur
   unde mundus iudicetur
*/
struct solution_s {
     md5sig s;
     unsigned short flags;
     short slvndx;
};

static solution *hlookup(planner *ego, const md5sig s, unsigned short flags)
{
     unsigned g, h = h1(ego, s), d = h2(ego, s);

     ++ego->lookup;

     for (g = h; ; g = addmod(g, d, ego->hashsiz)) {
	  solution *l = ego->solutions + g;
	  ++ego->lookup_iter;
	  if (VALIDP(l)) {
	       if (md5eq(s, l->s) && ORDERED(l->flags, flags)) { 
		    ++ego->succ_lookup;
		    return l; 
	       }
	  } else {
	       return 0;
	  }
	  A((g + d) % ego->hashsiz != h);
     }
}


static void hinsert0(planner *ego, const md5sig s, unsigned short flags,
		     int slvndx, solution *l)
{
     ++ego->insert;
     if (!l) { 	 
	  /* search for nonfull slot */
	  unsigned g, h = h1(ego, s), d = h2(ego, s); 
	  ++ego->insert_unknown;
	  for (g = h; ; g = addmod(g, d, ego->hashsiz)) {
	       ++ego->insert_iter;
	       l = ego->solutions + g;
	       if (!VALIDP(l)) break;
	       A((g + d) % ego->hashsiz != h);
	  }
     }

     /* fill slot */
     l->flags = flags | H_VALID;
     l->slvndx = (short)slvndx;
     sigcpy(s, l->s);
}

static void rehash(planner *ego, unsigned nsiz)
{
     unsigned osiz = ego->hashsiz, h;
     solution *osol = ego->solutions, *nsol;

     nsiz = (unsigned)X(next_prime)((int)nsiz);
     nsol = (solution *)MALLOC(nsiz * sizeof(solution), HASHT);
     ++ego->nrehash;

     /* init new table */
     for (h = 0; h < nsiz; ++h) 
	  nsol[h].flags = 0;

     /* install new table */
     ego->hashsiz = nsiz;
     ego->solutions = nsol;

     /* copy table */
     for (h = 0; h < osiz; ++h) {
	  solution *l = osol + h;
	  if (VALIDP(l))
	       hinsert0(ego, l->s, l->flags, l->slvndx, 0);
     }

     X(ifree0)(osol);
}

static unsigned minsz(unsigned nelem)
{
     return 1U + nelem + nelem / 8U;
}

static unsigned nextsz(unsigned nelem)
{
     return minsz(minsz(nelem));
}

static void hgrow(planner *ego)
{
     unsigned nelem = ego->nelem;
     if (minsz(nelem) >= ego->hashsiz)
	  rehash(ego, nextsz(nelem));
}

static void hshrink(planner *ego)
{
     unsigned nelem = ego->nelem;
     /* always rehash after deletions */
     rehash(ego, nextsz(nelem));
}

/* inherit blessing, but only if the solver is the same */
static unsigned short merge_flags(unsigned short dstflags, int dstndx,
				  unsigned short srcflags, int srcndx)
{
     if (srcndx == dstndx)
	  dstflags |= (srcflags & BLESSING); /* ne me perdas illa die */
     return dstflags;
}

static void hinsert(planner *ego, const md5sig s, 
		    unsigned short flags, int slvndx)
{
     solution *l;

     if ((l = hlookup(ego, s, flags))) {
	  if (SUBSUMES(flags, l->flags)) {
	       /* overwrite old solution */
	       flags = merge_flags(flags, slvndx, l->flags, l->slvndx);
	  } else {
	       A(SUBSUMES(l->flags, flags));
	       l->flags = merge_flags(l->flags, l->slvndx, flags, slvndx);
	       return;
	  }
     } else {
	  ++ego->nelem;
	  hgrow(ego);
     }
     hinsert0(ego, s, flags, slvndx, l);
}

static void hcurse_subsumed(planner *ego)
{
     unsigned h;

     /* unbless any entries that are unreachable because they
        are subsumed by less-impatient ones.  */
     for (h = 0; h < ego->hashsiz; ++h) {
	  solution *l = ego->solutions + h;
	  if (VALIDP(l)) {
	       unsigned d = h2(ego, l->s), g = addmod(h, d, ego->hashsiz);
	       for (; ; g = addmod(g, d, ego->hashsiz)) {
		    solution *m = ego->solutions + g;
		    if (VALIDP(m)) {
			 if (md5eq(l->s, m->s) &&
			     SUBSUMES(l->flags, m->flags)) {
			      /* ne cadant in obscurum */
			      l->flags = merge_flags(l->flags, l->slvndx,
						     m->flags, m->slvndx);

			      /* cum vix justus sit securus */
			      UNBLESS(m->flags);
			 }
		    }
		    else break;
		    A((g + d) % ego->hashsiz != h);
	       }
	  }
     }
}


static void invoke_hook(planner *ego, plan *pln, const problem *p, 
			int optimalp)
{
     if (ego->hook)
	  ego->hook(pln, p, optimalp);
}

static void evaluate_plan(planner *ego, plan *pln, const problem *p)
{
     if (!BELIEVE_PCOSTP(ego) || pln->pcost == 0.0) {
	  ego->nplan++;

	  if (ESTIMATEP(ego)) {
	  estimate:
	       /* heuristic */
	       pln->pcost = 0.0
		    + pln->ops.add
		    + pln->ops.mul
		    + 2 * pln->ops.fma
		    + pln->ops.other;
	       ego->epcost += pln->pcost;
	  } else {
	       double t = X(measure_execution_time)(pln, p);

	       if (t < 0) {  /* unavailable cycle counter */
		    /* Real programmers can write FORTRAN in any language */
		    goto estimate;
	       }

	       pln->pcost = t;
	       ego->pcost += t;
	  }
     }
     
     invoke_hook(ego, pln, p, 0);
}

/* maintain dynamic scoping of flags, nthr: */
static plan *invoke_solver(planner *ego, problem *p, solver *s, 
			   unsigned short nflags)
{
     unsigned short planner_flags = ego->planner_flags;
     unsigned problem_flags = ego->problem_flags;
     int nthr = ego->nthr;
     plan *pln;
     ego->planner_flags = nflags;
     pln = s->adt->mkplan(s, p, ego);
     ego->problem_flags = problem_flags;
     ego->nthr = nthr;
     ego->planner_flags = planner_flags;
     return pln;
}

static plan *search(planner *ego, problem *p, slvdesc **descp)
{
     plan *best = 0;
     int best_not_yet_timed = 1;
     int pass;

     if (NO_SEARCHP(ego)) {
	  /* D("invalid search for %P %x\n", p, ego->planner_flags); */
	  return 0;
     }

     for (pass = 0; pass < 2; ++pass) {
	  unsigned short nflags = ego->planner_flags;
	  
	  if (best) break;

	  switch (pass) {
	      case 0: 
		   /* skip pass 0 during exhaustive search */
		   if (!NO_EXHAUSTIVEP(ego)) continue;
		   nflags |= NO_UGLY;
		   break;
	      case 1:
		   /* skip pass 1 if NO_UGLY */
		   if (NO_UGLYP(ego)) continue;
		   break;
	  }

          FORALL_SOLVERS(ego, s, sp, {
	       plan *pln = invoke_solver(ego, p, s, nflags);

	       if (pln) {
		    if (best) {
			 if (best_not_yet_timed) {
			      evaluate_plan(ego, best, p);
			      best_not_yet_timed = 0;
			 }
			 evaluate_plan(ego, pln, p);
			 if (pln->pcost < best->pcost) {
			      X(plan_destroy_internal)(best);
			      best = pln;
			      *descp = sp;
			 } else {
			      X(plan_destroy_internal)(pln);
			 }
		    } else {
			 best = pln;
			 *descp = sp;
		    }
	       }
	  });
     }

     return best;
}

static plan *mkplan(planner *ego, problem *p)
{
     plan *pln;
     md5 m;
     slvdesc *sp;
     unsigned short flags;
     ASSERT_ALIGNED_DOUBLE;

     /* Canonical form. */
     if (!NO_EXHAUSTIVEP(ego)) ego->planner_flags &= ~NO_UGLY;
	  
     ++ego->nprob;
     md5hash(&m, p, ego);

     pln = 0;

     {
	  solution *sol; /* new scope for sol */

	  if ((sol = hlookup(ego, m.s, ego->planner_flags))) {
	       if (SUBSUMES(sol->flags, ego->planner_flags)) {
		    /* wisdom is acceptable */
		    if (sol->slvndx < 0) 
			 return 0;   /* known to be infeasible */

		    /* use solver to obtain a plan */
		    sp = ego->slvdescs + sol->slvndx;
		    pln = 
			 invoke_solver(ego, p, sp->slv, 
				       (0
					| NO_SEARCH 
					| IMPATIENCE(sol->flags)
					| NONIMPATIENCE(ego->planner_flags) ));

		    /* if (!pln) then the entry is bogus, but
		       we currently do nothing about it. */
		    /* CAVEAS: Do not use ``sol'' here, because the
		       pointer is possibly dangling after the call to
		       invoke_solver(). */
	       } else {
		    A(SUBSUMES(ego->planner_flags, sol->flags));
	       }
	  }
     }


     if (!pln)
	  pln = search(ego, p, &sp);

     flags = ego->planner_flags;

     if (pln) {
	  /* Postulate de iure that NO_UGLY subsumes ~NO_UGLY if the
	     problem is feasible. Also postulate that NO_SEARCH
	     subsumes ~NO_SEARCH. */
	  flags &= ~(NO_UGLY | NO_SEARCH);
     }

     hinsert(ego, m.s, flags, pln ? sp - ego->slvdescs : -1);

     if (pln)
	  invoke_hook(ego, pln, p, 1);
     return pln;
}

/* destroy hash table entries.  If FORGET_EVERYTHING, destroy the whole
   table.  If FORGET_ACCURSED, then destroy entries that are not blessed. */
static void forget(planner *ego, amnesia a)
{
     unsigned h;

     /* garbage-collect while we are at it */ 
     if (a != FORGET_EVERYTHING)
	  hcurse_subsumed(ego);

     for (h = 0; h < ego->hashsiz; ++h) {
	  solution *l = ego->solutions + h;
	  if (VALIDP(l)) {
	       if (a == FORGET_EVERYTHING ||
		   (a == FORGET_ACCURSED && !BLESSEDP(l))) {
		    /* confutatis maledictis
		       flammis acribus addictis */
		    l->flags &= ~H_VALID;
		    --ego->nelem;
	       }
	  }
     }
     /* nil inultum remanebit */

     hshrink(ego);
}

static void htab_destroy(planner *ego)
{
     forget(ego, FORGET_EVERYTHING);
     X(ifree)(ego->solutions);
     ego->nelem = 0U;
}

/* FIXME: what sort of version information should we write? */
#define WISDOM_PREAMBLE PACKAGE "-" VERSION " " STRINGIZE(X(wisdom))

/* tantus labor non sit cassus */
static void exprt(planner *ego, printer *p)
{
     unsigned h;

     hcurse_subsumed(ego);

     p->print(p, "(" WISDOM_PREAMBLE "%(");
     for (h = 0; h < ego->hashsiz; ++h) {
	  solution *l = ego->solutions + h;
	  if (VALIDP(l) && BLESSEDP(l) && l->slvndx >= 0) {
	       slvdesc *sp = ego->slvdescs + l->slvndx;
	       /* qui salvandos salvas gratis
		  salva me fons pietatis */
	       p->print(p, "(%s %d #x%x #x%M #x%M #x%M #x%M)\n",
			sp->reg_nam, sp->reg_id, (int)l->flags,
			l->s[0], l->s[1], l->s[2], l->s[3]);
	  }
     }
     p->print(p, "%))\n");
}

/* mors stupebit et natura
   cum resurget creatura */
static int imprt(planner *ego, scanner *sc)
{
     char buf[MAXNAM + 1];
     md5uint sig[4];
     int flags;
     int reg_id;
     int slvndx;
     solution *sol;

     if (!sc->scan(sc, "(" WISDOM_PREAMBLE))
	  return 0; /* don't need to restore hashtable */

     /* make a backup copy of the hash table (cache the hash) */
     {
	  unsigned h, hsiz = ego->hashsiz;
	  sol = (solution *)MALLOC(hsiz * sizeof(solution), HASHT);
	  for (h = 0; h < hsiz; ++h)
	       sol[h] = ego->solutions[h];
     }

     while (1) {
	  if (sc->scan(sc, ")"))
	       break;

	  /* qua resurget ex favilla */
	  if (!sc->scan(sc, "(%*s %d #x%x #x%M #x%M #x%M #x%M)",
			MAXNAM, buf, &reg_id, &flags, 
			sig + 0, sig + 1, sig + 2, sig + 3))
	       goto bad;

	  if ((slvndx = slookup(ego, buf, reg_id)) < 0)
	       goto bad;

	  /* inter oves locum praesta */
	  hinsert(ego, sig, (unsigned short)flags, slvndx);
     }

     X(ifree0)(sol);
     return 1;

 bad:
     /* ``The wisdom of FFTW must be above suspicion.'' */
     X(ifree0)(ego->solutions);
     ego->solutions = sol;
     return 0;
}

/*
 * create a planner
 */
planner *X(mkplanner)(void)
{
     static const planner_adt padt = {
	  register_solver, mkplan, forget, exprt, imprt
     };

     planner *p = (planner *) MALLOC(sizeof(planner), PLANNERS);

     p->adt = &padt;
     p->nplan = p->nprob = p->nrehash = 0;
     p->pcost = p->epcost = 0.0;
     p->succ_lookup = p->lookup = p->lookup_iter = 0;
     p->insert = p->insert_iter = p->insert_unknown = 0;
     p->hook = 0;
     p->cur_reg_nam = 0;

     p->slvdescs = 0;
     p->nslvdesc = p->slvdescsiz = 0;

     p->solutions = 0;
     p->hashsiz = p->nelem = 0U;

     p->problem_flags = 0;
     p->planner_flags = 0;
     p->nthr = 1;

     hgrow(p);			/* so that hashsiz > 0 */

     return p;
}

void X(planner_destroy)(planner *ego)
{
     /* destroy hash table */
     htab_destroy(ego);

     /* destroy solvdesc table */
     FORALL_SOLVERS(ego, s, sp, {
	  UNUSED(sp);
	  X(solver_destroy)(s);
     });

     X(ifree0)(ego->slvdescs);
     X(ifree)(ego); /* dona eis requiem */
}

plan *X(mkplan_d)(planner *ego, problem *p)
{
     plan *pln = ego->adt->mkplan(ego, p);
     X(problem_destroy)(p);
     return pln;
}

/*
 * Debugging code:
 */
#ifdef FFTW_DEBUG

void X(planner_dump)(planner *ego, int verbose)
{
     unsigned valid = 0, empty = 0, infeasible = 0;
     unsigned h;
     UNUSED(verbose); /* historical */

     for (h = 0; h < ego->hashsiz; ++h) {
	  solution *l = ego->solutions + h; 
	  if (VALIDP(l)) {
	       ++valid; 
	       if (l->slvndx < 0) ++infeasible;
	  } else
	       ++empty;
	  
     }

     D("nplan = %d\n", ego->nplan);
     D("nprob = %d\n", ego->nprob);
     D("pcost = %g\n", ego->pcost);
     D("epcost = %g\n", ego->epcost);
     D("lookup = %d\n", ego->lookup);
     D("succ_lookup = %d\n", ego->succ_lookup);
     D("lookup_iter = %d\n", ego->lookup_iter);
     D("insert = %d\n", ego->insert);
     D("insert_iter = %d\n", ego->insert_iter);
     D("insert_unknown = %d\n", ego->insert_unknown);
     D("nrehash = %d\n", ego->nrehash);
     D("hashsiz = %u\n", ego->hashsiz);
     D("empty = %d\n", empty);
     D("valid = %d\n", valid);
     D("infeasible = %d\n", infeasible);
     A(ego->nelem == valid);
}

#endif