/*
 * Copyright (c) 2003 Matteo Frigo
 * Copyright (c) 2003 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

/* This file was automatically generated --- DO NOT EDIT */
/* Generated on Sat Jul  5 21:57:27 EDT 2003 */

#include "codelet-rdft.h"

/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -twiddle-log3 -n 8 -dit -name hf2_8 -include hf.h */

/*
 * This function contains 74 FP additions, 44 FP multiplications,
 * (or, 56 additions, 26 multiplications, 18 fused multiply/add),
 * 50 stack variables, and 32 memory accesses
 */
/*
 * Generator Id's : 
 * $Id: hf2_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
 * $Id: hf2_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
 * $Id: hf2_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
 */

#include "hf.h"

static const R *hf2_8(R *rio, R *iio, const R *W, stride ios, int m, int dist)
{
     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
     int i;
     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 6) {
	  E T1, T1c, TJ, T17, TY, TV, TR, Tk, Tr, TN, TM, Tw, TB, TS, Te;
	  E T1b;
	  T1 = rio[0];
	  T1c = iio[-WS(ios, 7)];
	  {
	       E T9, T3, T6, T4, Tb, T7, Ta, Tg, Ti, TI, TX, Tl, Tm, Tn, TW;
	       E Tp, TF, Td, TA, Th, Ty, Tj, Tv, To, Tq, Tu;
	       T9 = rio[WS(ios, 4)];
	       {
		    E TD, TH, TE, TG, T2, T5;
		    TD = rio[WS(ios, 7)];
		    TH = iio[-WS(ios, 4)];
		    TE = iio[0];
		    TG = rio[WS(ios, 3)];
		    T2 = W[2];
		    T5 = W[3];
		    T3 = W[0];
		    T6 = W[1];
		    T4 = T2 * T3;
		    Tb = T5 * T3;
		    T7 = T5 * T6;
		    Ta = T2 * T6;
		    Tg = T4 + T7;
		    Ti = Ta - Tb;
		    TI = FMA(T2, TG, T5 * TH);
		    TX = FNMS(T5, TG, T2 * TH);
		    Tl = W[4];
		    Tm = W[5];
		    Tn = FMA(Tl, T3, Tm * T6);
		    TW = FNMS(Tm, TD, Tl * TE);
		    Tp = FNMS(Tm, T3, Tl * T6);
		    TF = FMA(Tl, TD, Tm * TE);
	       }
	       Td = iio[-WS(ios, 3)];
	       TA = iio[-WS(ios, 2)];
	       Th = rio[WS(ios, 2)];
	       Ty = rio[WS(ios, 5)];
	       Tj = iio[-WS(ios, 5)];
	       Tv = iio[-WS(ios, 6)];
	       To = rio[WS(ios, 6)];
	       Tq = iio[-WS(ios, 1)];
	       Tu = rio[WS(ios, 1)];
	       TJ = TF + TI;
	       T17 = TW + TX;
	       TY = TW - TX;
	       TV = TF - TI;
	       TR = FNMS(T6, Tu, T3 * Tv);
	       Tk = FNMS(Ti, Tj, Tg * Th);
	       Tr = FNMS(Tp, Tq, Tn * To);
	       TN = FMA(Tp, To, Tn * Tq);
	       TM = FMA(Ti, Th, Tg * Tj);
	       Tw = FMA(T3, Tu, T6 * Tv);
	       {
		    E Tx, Tz, T8, Tc;
		    Tx = FNMS(Tm, Ti, Tl * Tg);
		    Tz = FMA(Tl, Ti, Tm * Tg);
		    TB = FMA(Tx, Ty, Tz * TA);
		    TS = FNMS(Tz, Ty, Tx * TA);
		    T8 = T4 - T7;
		    Tc = Ta + Tb;
		    Te = FMA(T8, T9, Tc * Td);
		    T1b = FNMS(Tc, T9, T8 * Td);
	       }
	  }
	  {
	       E TK, T1f, T18, T19, Tt, T15, T1e, T1g, TC, T16;
	       TC = Tw + TB;
	       TK = TC + TJ;
	       T1f = TJ - TC;
	       T16 = TR + TS;
	       T18 = T16 - T17;
	       T19 = T16 + T17;
	       {
		    E Tf, Ts, T1a, T1d;
		    Tf = T1 + Te;
		    Ts = Tk + Tr;
		    Tt = Tf + Ts;
		    T15 = Tf - Ts;
		    T1a = TM + TN;
		    T1d = T1b + T1c;
		    T1e = T1a + T1d;
		    T1g = T1d - T1a;
	       }
	       iio[-WS(ios, 4)] = Tt - TK;
	       rio[WS(ios, 4)] = T19 - T1e;
	       rio[0] = Tt + TK;
	       iio[0] = T19 + T1e;
	       iio[-WS(ios, 6)] = T15 - T18;
	       rio[WS(ios, 6)] = T1f - T1g;
	       rio[WS(ios, 2)] = T15 + T18;
	       iio[-WS(ios, 2)] = T1f + T1g;
	  }
	  {
	       E TZ, T13, TP, T11, TU, T12, T1k, T1m, TL, TO;
	       TZ = TV - TY;
	       T13 = TV + TY;
	       TL = T1 - Te;
	       TO = TM - TN;
	       TP = TL + TO;
	       T11 = TL - TO;
	       {
		    E TQ, TT, T1i, T1j;
		    TQ = Tw - TB;
		    TT = TR - TS;
		    TU = TQ + TT;
		    T12 = TT - TQ;
		    T1i = T1c - T1b;
		    T1j = Tk - Tr;
		    T1k = T1i - T1j;
		    T1m = T1j + T1i;
	       }
	       {
		    E T10, T1h, T14, T1l;
		    T10 = KP707106781 * (TU + TZ);
		    iio[-WS(ios, 5)] = TP - T10;
		    rio[WS(ios, 1)] = TP + T10;
		    T1h = KP707106781 * (T12 + T13);
		    rio[WS(ios, 5)] = T1h - T1k;
		    iio[-WS(ios, 1)] = T1h + T1k;
		    T14 = KP707106781 * (T12 - T13);
		    iio[-WS(ios, 7)] = T11 - T14;
		    rio[WS(ios, 3)] = T11 + T14;
		    T1l = KP707106781 * (TZ - TU);
		    rio[WS(ios, 7)] = T1l - T1m;
		    iio[-WS(ios, 3)] = T1l + T1m;
	       }
	  }
     }
     return W;
}

static const tw_instr twinstr[] = {
     {TW_COS, 0, 1},
     {TW_SIN, 0, 1},
     {TW_COS, 0, 3},
     {TW_SIN, 0, 3},
     {TW_COS, 0, 7},
     {TW_SIN, 0, 7},
     {TW_NEXT, 1, 0}
};

static const hc2hc_desc desc = { 8, "hf2_8", twinstr, {56, 26, 18, 0}, &GENUS, 0, 0, 0 };

void X(codelet_hf2_8) (planner *p) {
     X(khc2hc_dit_register) (p, hf2_8, &desc);
}