From 5a422aba704c375a307a902bafe658342e209906 Mon Sep 17 00:00:00 2001 From: scuri Date: Fri, 17 Oct 2008 06:10:15 +0000 Subject: First commit - moving from LuaForge to SourceForge --- src/fftw3/dft/codelets/standard/t2_8.c | 192 +++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 src/fftw3/dft/codelets/standard/t2_8.c (limited to 'src/fftw3/dft/codelets/standard/t2_8.c') diff --git a/src/fftw3/dft/codelets/standard/t2_8.c b/src/fftw3/dft/codelets/standard/t2_8.c new file mode 100644 index 0000000..d9aec9a --- /dev/null +++ b/src/fftw3/dft/codelets/standard/t2_8.c @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Sat Jul 5 21:30:08 EDT 2003 */ + +#include "codelet-dft.h" + +/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -twiddle-log3 -n 8 -name t2_8 -include t.h */ + +/* + * This function contains 74 FP additions, 44 FP multiplications, + * (or, 56 additions, 26 multiplications, 18 fused multiply/add), + * 50 stack variables, and 32 memory accesses + */ +/* + * Generator Id's : + * $Id: t2_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $ + * $Id: t2_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $ + * $Id: t2_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $ + */ + +#include "t.h" + +static const R *t2_8(R *ri, R *ii, const R *W, stride ios, int m, int dist) +{ + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + int i; + for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 6) { + E T1, T1c, TJ, T17, TY, TV, TR, Tk, Tr, TN, TM, Tw, TB, TS, Te; + E T1b; + T1 = ri[0]; + T1c = ii[0]; + { + E T9, Td, Th, Tj, To, Tq, Ty, TA, Tv, Tu, T3, T6, T4, Tb, T7; + E Ta, Tg, Ti, TI, TX, Tl, Tm, Tn, TW, Tp, TF; + T9 = ri[WS(ios, 4)]; + Td = ii[WS(ios, 4)]; + Th = ri[WS(ios, 2)]; + Tj = ii[WS(ios, 2)]; + To = ri[WS(ios, 6)]; + Tq = ii[WS(ios, 6)]; + Ty = ri[WS(ios, 5)]; + TA = ii[WS(ios, 5)]; + Tv = ii[WS(ios, 1)]; + Tu = ri[WS(ios, 1)]; + { + E TD, TE, TG, TH, T2, T5; + TD = ri[WS(ios, 7)]; + TE = ii[WS(ios, 7)]; + TG = ri[WS(ios, 3)]; + TH = ii[WS(ios, 3)]; + T2 = W[2]; + T5 = W[3]; + T3 = W[0]; + T6 = W[1]; + T4 = T2 * T3; + Tb = T5 * T3; + T7 = T5 * T6; + Ta = T2 * T6; + Tg = T4 + T7; + Ti = Ta - Tb; + TI = FMA(T2, TG, T5 * TH); + TX = FNMS(T5, TG, T2 * TH); + Tl = W[4]; + Tm = W[5]; + Tn = FMA(Tl, T3, Tm * T6); + TW = FNMS(Tm, TD, Tl * TE); + Tp = FNMS(Tm, T3, Tl * T6); + TF = FMA(Tl, TD, Tm * TE); + } + TJ = TF + TI; + T17 = TW + TX; + TY = TW - TX; + TV = TF - TI; + TR = FNMS(T6, Tu, T3 * Tv); + Tk = FNMS(Ti, Tj, Tg * Th); + Tr = FNMS(Tp, Tq, Tn * To); + TN = FMA(Tp, To, Tn * Tq); + TM = FMA(Ti, Th, Tg * Tj); + Tw = FMA(T3, Tu, T6 * Tv); + { + E Tx, Tz, T8, Tc; + Tx = FNMS(Tm, Ti, Tl * Tg); + Tz = FMA(Tl, Ti, Tm * Tg); + TB = FMA(Tx, Ty, Tz * TA); + TS = FNMS(Tz, Ty, Tx * TA); + T8 = T4 - T7; + Tc = Ta + Tb; + Te = FMA(T8, T9, Tc * Td); + T1b = FNMS(Tc, T9, T8 * Td); + } + } + { + E TK, T1f, T18, T19, Tt, T15, T1e, T1g, TC, T16; + TC = Tw + TB; + TK = TC + TJ; + T1f = TJ - TC; + T16 = TR + TS; + T18 = T16 - T17; + T19 = T16 + T17; + { + E Tf, Ts, T1a, T1d; + Tf = T1 + Te; + Ts = Tk + Tr; + Tt = Tf + Ts; + T15 = Tf - Ts; + T1a = TM + TN; + T1d = T1b + T1c; + T1e = T1a + T1d; + T1g = T1d - T1a; + } + ri[WS(ios, 4)] = Tt - TK; + ii[WS(ios, 4)] = T1e - T19; + ri[0] = Tt + TK; + ii[0] = T19 + T1e; + ri[WS(ios, 6)] = T15 - T18; + ii[WS(ios, 6)] = T1g - T1f; + ri[WS(ios, 2)] = T15 + T18; + ii[WS(ios, 2)] = T1f + T1g; + } + { + E TZ, T13, TP, T11, TU, T12, T1k, T1m, TL, TO; + TZ = TV - TY; + T13 = TV + TY; + TL = T1 - Te; + TO = TM - TN; + TP = TL + TO; + T11 = TL - TO; + { + E TQ, TT, T1i, T1j; + TQ = Tw - TB; + TT = TR - TS; + TU = TQ + TT; + T12 = TT - TQ; + T1i = T1c - T1b; + T1j = Tk - Tr; + T1k = T1i - T1j; + T1m = T1j + T1i; + } + { + E T10, T1h, T14, T1l; + T10 = KP707106781 * (TU + TZ); + ri[WS(ios, 5)] = TP - T10; + ri[WS(ios, 1)] = TP + T10; + T1h = KP707106781 * (T12 + T13); + ii[WS(ios, 1)] = T1h + T1k; + ii[WS(ios, 5)] = T1k - T1h; + T14 = KP707106781 * (T12 - T13); + ri[WS(ios, 7)] = T11 - T14; + ri[WS(ios, 3)] = T11 + T14; + T1l = KP707106781 * (TZ - TU); + ii[WS(ios, 3)] = T1l + T1m; + ii[WS(ios, 7)] = T1m - T1l; + } + } + } + return W; +} + +static const tw_instr twinstr[] = { + {TW_COS, 0, 1}, + {TW_SIN, 0, 1}, + {TW_COS, 0, 3}, + {TW_SIN, 0, 3}, + {TW_COS, 0, 7}, + {TW_SIN, 0, 7}, + {TW_NEXT, 1, 0} +}; + +static const ct_desc desc = { 8, "t2_8", twinstr, {56, 26, 18, 0}, &GENUS, 0, 0, 0 }; + +void X(codelet_t2_8) (planner *p) { + X(kdft_dit_register) (p, t2_8, &desc); +} -- cgit v1.2.3