summaryrefslogtreecommitdiff
path: root/src/fftw3/dft/codelets/standard/t2_8.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/fftw3/dft/codelets/standard/t2_8.c')
-rw-r--r--src/fftw3/dft/codelets/standard/t2_8.c192
1 files changed, 192 insertions, 0 deletions
diff --git a/src/fftw3/dft/codelets/standard/t2_8.c b/src/fftw3/dft/codelets/standard/t2_8.c
new file mode 100644
index 0000000..d9aec9a
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t2_8.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul 5 21:30:08 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -twiddle-log3 -n 8 -name t2_8 -include t.h */
+
+/*
+ * This function contains 74 FP additions, 44 FP multiplications,
+ * (or, 56 additions, 26 multiplications, 18 fused multiply/add),
+ * 50 stack variables, and 32 memory accesses
+ */
+/*
+ * Generator Id's :
+ * $Id: t2_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t2_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t2_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t2_8(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+ DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+ int i;
+ for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 6) {
+ E T1, T1c, TJ, T17, TY, TV, TR, Tk, Tr, TN, TM, Tw, TB, TS, Te;
+ E T1b;
+ T1 = ri[0];
+ T1c = ii[0];
+ {
+ E T9, Td, Th, Tj, To, Tq, Ty, TA, Tv, Tu, T3, T6, T4, Tb, T7;
+ E Ta, Tg, Ti, TI, TX, Tl, Tm, Tn, TW, Tp, TF;
+ T9 = ri[WS(ios, 4)];
+ Td = ii[WS(ios, 4)];
+ Th = ri[WS(ios, 2)];
+ Tj = ii[WS(ios, 2)];
+ To = ri[WS(ios, 6)];
+ Tq = ii[WS(ios, 6)];
+ Ty = ri[WS(ios, 5)];
+ TA = ii[WS(ios, 5)];
+ Tv = ii[WS(ios, 1)];
+ Tu = ri[WS(ios, 1)];
+ {
+ E TD, TE, TG, TH, T2, T5;
+ TD = ri[WS(ios, 7)];
+ TE = ii[WS(ios, 7)];
+ TG = ri[WS(ios, 3)];
+ TH = ii[WS(ios, 3)];
+ T2 = W[2];
+ T5 = W[3];
+ T3 = W[0];
+ T6 = W[1];
+ T4 = T2 * T3;
+ Tb = T5 * T3;
+ T7 = T5 * T6;
+ Ta = T2 * T6;
+ Tg = T4 + T7;
+ Ti = Ta - Tb;
+ TI = FMA(T2, TG, T5 * TH);
+ TX = FNMS(T5, TG, T2 * TH);
+ Tl = W[4];
+ Tm = W[5];
+ Tn = FMA(Tl, T3, Tm * T6);
+ TW = FNMS(Tm, TD, Tl * TE);
+ Tp = FNMS(Tm, T3, Tl * T6);
+ TF = FMA(Tl, TD, Tm * TE);
+ }
+ TJ = TF + TI;
+ T17 = TW + TX;
+ TY = TW - TX;
+ TV = TF - TI;
+ TR = FNMS(T6, Tu, T3 * Tv);
+ Tk = FNMS(Ti, Tj, Tg * Th);
+ Tr = FNMS(Tp, Tq, Tn * To);
+ TN = FMA(Tp, To, Tn * Tq);
+ TM = FMA(Ti, Th, Tg * Tj);
+ Tw = FMA(T3, Tu, T6 * Tv);
+ {
+ E Tx, Tz, T8, Tc;
+ Tx = FNMS(Tm, Ti, Tl * Tg);
+ Tz = FMA(Tl, Ti, Tm * Tg);
+ TB = FMA(Tx, Ty, Tz * TA);
+ TS = FNMS(Tz, Ty, Tx * TA);
+ T8 = T4 - T7;
+ Tc = Ta + Tb;
+ Te = FMA(T8, T9, Tc * Td);
+ T1b = FNMS(Tc, T9, T8 * Td);
+ }
+ }
+ {
+ E TK, T1f, T18, T19, Tt, T15, T1e, T1g, TC, T16;
+ TC = Tw + TB;
+ TK = TC + TJ;
+ T1f = TJ - TC;
+ T16 = TR + TS;
+ T18 = T16 - T17;
+ T19 = T16 + T17;
+ {
+ E Tf, Ts, T1a, T1d;
+ Tf = T1 + Te;
+ Ts = Tk + Tr;
+ Tt = Tf + Ts;
+ T15 = Tf - Ts;
+ T1a = TM + TN;
+ T1d = T1b + T1c;
+ T1e = T1a + T1d;
+ T1g = T1d - T1a;
+ }
+ ri[WS(ios, 4)] = Tt - TK;
+ ii[WS(ios, 4)] = T1e - T19;
+ ri[0] = Tt + TK;
+ ii[0] = T19 + T1e;
+ ri[WS(ios, 6)] = T15 - T18;
+ ii[WS(ios, 6)] = T1g - T1f;
+ ri[WS(ios, 2)] = T15 + T18;
+ ii[WS(ios, 2)] = T1f + T1g;
+ }
+ {
+ E TZ, T13, TP, T11, TU, T12, T1k, T1m, TL, TO;
+ TZ = TV - TY;
+ T13 = TV + TY;
+ TL = T1 - Te;
+ TO = TM - TN;
+ TP = TL + TO;
+ T11 = TL - TO;
+ {
+ E TQ, TT, T1i, T1j;
+ TQ = Tw - TB;
+ TT = TR - TS;
+ TU = TQ + TT;
+ T12 = TT - TQ;
+ T1i = T1c - T1b;
+ T1j = Tk - Tr;
+ T1k = T1i - T1j;
+ T1m = T1j + T1i;
+ }
+ {
+ E T10, T1h, T14, T1l;
+ T10 = KP707106781 * (TU + TZ);
+ ri[WS(ios, 5)] = TP - T10;
+ ri[WS(ios, 1)] = TP + T10;
+ T1h = KP707106781 * (T12 + T13);
+ ii[WS(ios, 1)] = T1h + T1k;
+ ii[WS(ios, 5)] = T1k - T1h;
+ T14 = KP707106781 * (T12 - T13);
+ ri[WS(ios, 7)] = T11 - T14;
+ ri[WS(ios, 3)] = T11 + T14;
+ T1l = KP707106781 * (TZ - TU);
+ ii[WS(ios, 3)] = T1l + T1m;
+ ii[WS(ios, 7)] = T1m - T1l;
+ }
+ }
+ }
+ return W;
+}
+
+static const tw_instr twinstr[] = {
+ {TW_COS, 0, 1},
+ {TW_SIN, 0, 1},
+ {TW_COS, 0, 3},
+ {TW_SIN, 0, 3},
+ {TW_COS, 0, 7},
+ {TW_SIN, 0, 7},
+ {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 8, "t2_8", twinstr, {56, 26, 18, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t2_8) (planner *p) {
+ X(kdft_dit_register) (p, t2_8, &desc);
+}