diff options
Diffstat (limited to 'src/fftw3/rdft/codelets/hc2r/hb_64.c')
-rw-r--r-- | src/fftw3/rdft/codelets/hc2r/hb_64.c | 1972 |
1 files changed, 1972 insertions, 0 deletions
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_64.c b/src/fftw3/rdft/codelets/hc2r/hb_64.c new file mode 100644 index 0000000..d3f9afc --- /dev/null +++ b/src/fftw3/rdft/codelets/hc2r/hb_64.c @@ -0,0 +1,1972 @@ +/* + * Copyright (c) 2003 Matteo Frigo + * Copyright (c) 2003 Massachusetts Institute of Technology + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/* This file was automatically generated --- DO NOT EDIT */ +/* Generated on Sat Jul 5 22:11:42 EDT 2003 */ + +#include "codelet-rdft.h" + +/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 64 -dif -name hb_64 -include hb.h */ + +/* + * This function contains 1038 FP additions, 500 FP multiplications, + * (or, 808 additions, 270 multiplications, 230 fused multiply/add), + * 196 stack variables, and 256 memory accesses + */ +/* + * Generator Id's : + * $Id: hb_64.c,v 1.1 2008/10/17 06:12:08 scuri Exp $ + * $Id: hb_64.c,v 1.1 2008/10/17 06:12:08 scuri Exp $ + * $Id: hb_64.c,v 1.1 2008/10/17 06:12:08 scuri Exp $ + */ + +#include "hb.h" + +static const R *hb_64(R *rio, R *iio, const R *W, stride ios, int m, int dist) +{ + DK(KP634393284, +0.634393284163645498215171613225493370675687095); + DK(KP773010453, +0.773010453362736960810906609758469800971041293); + DK(KP098017140, +0.098017140329560601994195563888641845861136673); + DK(KP995184726, +0.995184726672196886244836953109479921575474869); + DK(KP471396736, +0.471396736825997648556387625905254377657460319); + DK(KP881921264, +0.881921264348355029712756863660388349508442621); + DK(KP290284677, +0.290284677254462367636192375817395274691476278); + DK(KP956940335, +0.956940335732208864935797886980269969482849206); + DK(KP195090322, +0.195090322016128267848284868477022240927691618); + DK(KP980785280, +0.980785280403230449126182236134239036973933731); + DK(KP555570233, +0.555570233019602224742830813948532874374937191); + DK(KP831469612, +0.831469612302545237078788377617905756738560812); + DK(KP382683432, +0.382683432365089771728459984030398866761344562); + DK(KP923879532, +0.923879532511286756128183189396788286822416626); + DK(KP707106781, +0.707106781186547524400844362104849039284835938); + int i; + for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 126) { + E Tf, T7i, Tfa, ThM, Tgp, ThH, T2c, T5O, T4T, T6n, Tcp, Ted, TcA, TdE, T87; + E T9o, TK, T93, T2P, T4F, Tfo, Thz, T5T, T6j, Tbx, TdI, Tfl, ThA, T7r, T81; + E TbE, TdH, TZ, T94, T38, T4G, Tfv, ThC, T5W, T6k, TbQ, TdK, Tfs, ThD, T7w; + E T82, TbX, TdL, Tu, T84, Tfh, ThG, Tgm, ThN, T2v, T6m, T4K, T5P, Tce, TdF; + E TcD, Tec, T7l, T9p, T1L, T20, T9c, T9d, T9e, T9f, T40, T66, Tg1, Thu, Tg8; + E Thv, Tg5, Thr, T4n, T67, T4j, T69, T4w, T6a, TaT, TdW, Tb8, TdZ, TfU, Ths; + E T7O, T8y, T7T, T8z, Tbc, TdX, Tbj, Te0, T1g, T1v, T97, T98, T99, T9a, T3j; + E T5Z, TfI, Thk, TfP, Thl, TfM, Tho, T3G, T60, T3C, T62, T3P, T63, Tak, TdQ; + E Tav, TdT, TfB, Thn, T7D, T8v, T7I, T8w, TaD, TdP, TaG, TdS; + { + E T3, Tcm, T4O, Tcv, T6, Tcu, T4R, Tcn, Td, Tcy, T2a, Tch, Ta, Tcx, T27; + E Tck; + { + E T1, T2, T4P, T4Q; + T1 = rio[0]; + T2 = iio[-WS(ios, 32)]; + T3 = T1 + T2; + Tcm = T1 - T2; + { + E T4M, T4N, T4, T5; + T4M = iio[0]; + T4N = rio[WS(ios, 32)]; + T4O = T4M - T4N; + Tcv = T4M + T4N; + T4 = rio[WS(ios, 16)]; + T5 = iio[-WS(ios, 48)]; + T6 = T4 + T5; + Tcu = T4 - T5; + } + T4P = iio[-WS(ios, 16)]; + T4Q = rio[WS(ios, 48)]; + T4R = T4P - T4Q; + Tcn = T4P + T4Q; + { + E Tb, Tc, Tcf, T28, T29, Tcg; + Tb = iio[-WS(ios, 56)]; + Tc = rio[WS(ios, 24)]; + Tcf = Tb - Tc; + T28 = iio[-WS(ios, 24)]; + T29 = rio[WS(ios, 56)]; + Tcg = T29 + T28; + Td = Tb + Tc; + Tcy = Tcf + Tcg; + T2a = T28 - T29; + Tch = Tcf - Tcg; + } + { + E T8, T9, Tcj, T25, T26, Tci; + T8 = rio[WS(ios, 8)]; + T9 = iio[-WS(ios, 40)]; + Tcj = T8 - T9; + T25 = iio[-WS(ios, 8)]; + T26 = rio[WS(ios, 40)]; + Tci = T25 + T26; + Ta = T8 + T9; + Tcx = Tcj + Tci; + T27 = T25 - T26; + Tck = Tci - Tcj; + } + } + { + E T7, Te, Tf8, Tf9; + T7 = T3 + T6; + Te = Ta + Td; + Tf = T7 + Te; + T7i = T7 - Te; + Tf8 = Tcv - Tcu; + Tf9 = KP707106781 * (Tck + Tch); + Tfa = Tf8 + Tf9; + ThM = Tf8 - Tf9; + } + { + E Tgn, Tgo, T24, T2b; + Tgn = KP707106781 * (Tcx + Tcy); + Tgo = Tcm + Tcn; + Tgp = Tgn + Tgo; + ThH = Tgo - Tgn; + T24 = T3 - T6; + T2b = T27 - T2a; + T2c = T24 + T2b; + T5O = T24 - T2b; + } + { + E T4L, T4S, Tcl, Tco; + T4L = Td - Ta; + T4S = T4O - T4R; + T4T = T4L + T4S; + T6n = T4S - T4L; + Tcl = KP707106781 * (Tch - Tck); + Tco = Tcm - Tcn; + Tcp = Tcl + Tco; + Ted = Tco - Tcl; + } + { + E Tcw, Tcz, T85, T86; + Tcw = Tcu + Tcv; + Tcz = KP707106781 * (Tcx - Tcy); + TcA = Tcw + Tcz; + TdE = Tcw - Tcz; + T85 = T4O + T4R; + T86 = T27 + T2a; + T87 = T85 - T86; + T9o = T86 + T85; + } + } + { + E TC, Tby, T2x, Tbu, T2N, Tbz, T7o, Tbv, TJ, TbB, TbC, T2E, T2G, Tbp, Tbs; + E T7p, Tfj, Tfk; + { + E Tw, Tx, Ty, Tz, TA, TB; + Tw = rio[WS(ios, 2)]; + Tx = iio[-WS(ios, 34)]; + Ty = Tw + Tx; + Tz = rio[WS(ios, 18)]; + TA = iio[-WS(ios, 50)]; + TB = Tz + TA; + TC = Ty + TB; + Tby = Tz - TA; + T2x = Ty - TB; + Tbu = Tw - Tx; + } + { + E T2H, T2I, T2J, T2K, T2L, T2M; + T2H = iio[-WS(ios, 2)]; + T2I = rio[WS(ios, 34)]; + T2J = T2H - T2I; + T2K = iio[-WS(ios, 18)]; + T2L = rio[WS(ios, 50)]; + T2M = T2K - T2L; + T2N = T2J - T2M; + Tbz = T2H + T2I; + T7o = T2J + T2M; + Tbv = T2K + T2L; + } + { + E TF, Tbr, T2A, Tbq, TI, Tbn, T2D, Tbo; + { + E TD, TE, T2y, T2z; + TD = rio[WS(ios, 10)]; + TE = iio[-WS(ios, 42)]; + TF = TD + TE; + Tbr = TD - TE; + T2y = iio[-WS(ios, 10)]; + T2z = rio[WS(ios, 42)]; + T2A = T2y - T2z; + Tbq = T2y + T2z; + } + { + E TG, TH, T2B, T2C; + TG = iio[-WS(ios, 58)]; + TH = rio[WS(ios, 26)]; + TI = TG + TH; + Tbn = TG - TH; + T2B = iio[-WS(ios, 26)]; + T2C = rio[WS(ios, 58)]; + T2D = T2B - T2C; + Tbo = T2C + T2B; + } + TJ = TF + TI; + TbB = Tbr + Tbq; + TbC = Tbn + Tbo; + T2E = T2A - T2D; + T2G = TI - TF; + Tbp = Tbn - Tbo; + Tbs = Tbq - Tbr; + T7p = T2A + T2D; + } + TK = TC + TJ; + T93 = T7p + T7o; + { + E T2F, T2O, Tfm, Tfn; + T2F = T2x + T2E; + T2O = T2G + T2N; + T2P = FMA(KP923879532, T2F, KP382683432 * T2O); + T4F = FNMS(KP382683432, T2F, KP923879532 * T2O); + Tfm = KP707106781 * (TbB + TbC); + Tfn = Tbu + Tbv; + Tfo = Tfm + Tfn; + Thz = Tfn - Tfm; + } + { + E T5R, T5S, Tbt, Tbw; + T5R = T2x - T2E; + T5S = T2N - T2G; + T5T = FNMS(KP382683432, T5S, KP923879532 * T5R); + T6j = FMA(KP382683432, T5R, KP923879532 * T5S); + Tbt = KP707106781 * (Tbp - Tbs); + Tbw = Tbu - Tbv; + Tbx = Tbt + Tbw; + TdI = Tbw - Tbt; + } + Tfj = Tbz - Tby; + Tfk = KP707106781 * (Tbs + Tbp); + Tfl = Tfj + Tfk; + ThA = Tfj - Tfk; + { + E T7n, T7q, TbA, TbD; + T7n = TC - TJ; + T7q = T7o - T7p; + T7r = T7n + T7q; + T81 = T7q - T7n; + TbA = Tby + Tbz; + TbD = KP707106781 * (TbB - TbC); + TbE = TbA + TbD; + TdH = TbA - TbD; + } + } + { + E TR, TbU, T2Q, TbN, T36, TbV, T7t, TbO, TY, TbR, TbS, T2X, T2Z, TbI, TbL; + E T7u, Tfq, Tfr; + { + E TL, TM, TN, TO, TP, TQ; + TL = iio[-WS(ios, 62)]; + TM = rio[WS(ios, 30)]; + TN = TL + TM; + TO = rio[WS(ios, 14)]; + TP = iio[-WS(ios, 46)]; + TQ = TO + TP; + TR = TN + TQ; + TbU = TL - TM; + T2Q = TN - TQ; + TbN = TO - TP; + } + { + E T30, T31, T32, T33, T34, T35; + T30 = iio[-WS(ios, 30)]; + T31 = rio[WS(ios, 62)]; + T32 = T30 - T31; + T33 = iio[-WS(ios, 14)]; + T34 = rio[WS(ios, 46)]; + T35 = T33 - T34; + T36 = T32 - T35; + TbV = T33 + T34; + T7t = T32 + T35; + TbO = T31 + T30; + } + { + E TU, TbG, T2T, TbH, TX, TbJ, T2W, TbK; + { + E TS, TT, T2R, T2S; + TS = rio[WS(ios, 6)]; + TT = iio[-WS(ios, 38)]; + TU = TS + TT; + TbG = TS - TT; + T2R = iio[-WS(ios, 6)]; + T2S = rio[WS(ios, 38)]; + T2T = T2R - T2S; + TbH = T2R + T2S; + } + { + E TV, TW, T2U, T2V; + TV = iio[-WS(ios, 54)]; + TW = rio[WS(ios, 22)]; + TX = TV + TW; + TbJ = TV - TW; + T2U = iio[-WS(ios, 22)]; + T2V = rio[WS(ios, 54)]; + T2W = T2U - T2V; + TbK = T2V + T2U; + } + TY = TU + TX; + TbR = TbJ - TbK; + TbS = TbH - TbG; + T2X = T2T - T2W; + T2Z = TX - TU; + TbI = TbG + TbH; + TbL = TbJ + TbK; + T7u = T2T + T2W; + } + TZ = TR + TY; + T94 = T7u + T7t; + { + E T2Y, T37, Tft, Tfu; + T2Y = T2Q + T2X; + T37 = T2Z + T36; + T38 = FNMS(KP382683432, T37, KP923879532 * T2Y); + T4G = FMA(KP382683432, T2Y, KP923879532 * T37); + Tft = KP707106781 * (TbI + TbL); + Tfu = TbU + TbV; + Tfv = Tft + Tfu; + ThC = Tfu - Tft; + } + { + E T5U, T5V, TbM, TbP; + T5U = T2Q - T2X; + T5V = T36 - T2Z; + T5W = FMA(KP923879532, T5U, KP382683432 * T5V); + T6k = FNMS(KP382683432, T5U, KP923879532 * T5V); + TbM = KP707106781 * (TbI - TbL); + TbP = TbN - TbO; + TbQ = TbM + TbP; + TdK = TbP - TbM; + } + Tfq = KP707106781 * (TbS + TbR); + Tfr = TbN + TbO; + Tfs = Tfq - Tfr; + ThD = Tfq + Tfr; + { + E T7s, T7v, TbT, TbW; + T7s = TR - TY; + T7v = T7t - T7u; + T7w = T7s - T7v; + T82 = T7s + T7v; + TbT = KP707106781 * (TbR - TbS); + TbW = TbU - TbV; + TbX = TbT + TbW; + TdL = TbW - TbT; + } + } + { + E Ti, T2g, Tl, T2j, T2d, T2k, Tfc, Tfb, Tc5, Tc2, Tp, T2p, Ts, T2s, T2m; + E T2t, Tff, Tfe, Tcc, Tc9; + { + E Tc0, Tc4, Tc3, Tc1; + { + E Tg, Th, T2e, T2f; + Tg = rio[WS(ios, 4)]; + Th = iio[-WS(ios, 36)]; + Ti = Tg + Th; + Tc0 = Tg - Th; + T2e = iio[-WS(ios, 4)]; + T2f = rio[WS(ios, 36)]; + T2g = T2e - T2f; + Tc4 = T2e + T2f; + } + { + E Tj, Tk, T2h, T2i; + Tj = rio[WS(ios, 20)]; + Tk = iio[-WS(ios, 52)]; + Tl = Tj + Tk; + Tc3 = Tj - Tk; + T2h = iio[-WS(ios, 20)]; + T2i = rio[WS(ios, 52)]; + T2j = T2h - T2i; + Tc1 = T2h + T2i; + } + T2d = Ti - Tl; + T2k = T2g - T2j; + Tfc = Tc0 + Tc1; + Tfb = Tc4 - Tc3; + Tc5 = Tc3 + Tc4; + Tc2 = Tc0 - Tc1; + } + { + E Tc7, Tcb, Tca, Tc8; + { + E Tn, To, T2n, T2o; + Tn = iio[-WS(ios, 60)]; + To = rio[WS(ios, 28)]; + Tp = Tn + To; + Tc7 = Tn - To; + T2n = iio[-WS(ios, 28)]; + T2o = rio[WS(ios, 60)]; + T2p = T2n - T2o; + Tcb = T2o + T2n; + } + { + E Tq, Tr, T2q, T2r; + Tq = rio[WS(ios, 12)]; + Tr = iio[-WS(ios, 44)]; + Ts = Tq + Tr; + Tca = Tq - Tr; + T2q = iio[-WS(ios, 12)]; + T2r = rio[WS(ios, 44)]; + T2s = T2q - T2r; + Tc8 = T2q + T2r; + } + T2m = Tp - Ts; + T2t = T2p - T2s; + Tff = Tca + Tcb; + Tfe = Tc7 + Tc8; + Tcc = Tca - Tcb; + Tc9 = Tc7 - Tc8; + } + { + E Tm, Tt, Tfd, Tfg; + Tm = Ti + Tl; + Tt = Tp + Ts; + Tu = Tm + Tt; + T84 = Tt - Tm; + Tfd = FNMS(KP382683432, Tfc, KP923879532 * Tfb); + Tfg = FNMS(KP923879532, Tff, KP382683432 * Tfe); + Tfh = Tfd + Tfg; + ThG = Tfg - Tfd; + } + { + E Tgk, Tgl, T2l, T2u; + Tgk = FMA(KP382683432, Tfb, KP923879532 * Tfc); + Tgl = FMA(KP923879532, Tfe, KP382683432 * Tff); + Tgm = Tgk + Tgl; + ThN = Tgk - Tgl; + T2l = T2d + T2k; + T2u = T2m - T2t; + T2v = KP707106781 * (T2l + T2u); + T6m = KP707106781 * (T2l - T2u); + } + { + E T4I, T4J, Tc6, Tcd; + T4I = T2k - T2d; + T4J = T2m + T2t; + T4K = KP707106781 * (T4I + T4J); + T5P = KP707106781 * (T4J - T4I); + Tc6 = FNMS(KP382683432, Tc5, KP923879532 * Tc2); + Tcd = FMA(KP923879532, Tc9, KP382683432 * Tcc); + Tce = Tc6 + Tcd; + TdF = Tcd - Tc6; + } + { + E TcB, TcC, T7j, T7k; + TcB = FMA(KP923879532, Tc5, KP382683432 * Tc2); + TcC = FNMS(KP382683432, Tc9, KP923879532 * Tcc); + TcD = TcB + TcC; + Tec = TcB - TcC; + T7j = T2g + T2j; + T7k = T2s + T2p; + T7l = T7j - T7k; + T9p = T7j + T7k; + } + } + { + E T1z, T1C, T1D, Tbg, TaQ, T4r, T4u, T7Q, Tbh, TaR, T1G, T3V, T1J, T3Y, T1K; + E T7R, Tbe, Tbd, TaO, TaL, T1S, TfV, TfW, T41, T48, TaW, TaZ, T7L, T1Z, TfY; + E TfZ, T4a, T4h, Tb3, Tb6, T7M; + { + E T1x, T1y, T1A, T1B; + T1x = iio[-WS(ios, 63)]; + T1y = rio[WS(ios, 31)]; + T1z = T1x + T1y; + T1A = rio[WS(ios, 15)]; + T1B = iio[-WS(ios, 47)]; + T1C = T1A + T1B; + T1D = T1z + T1C; + Tbg = T1x - T1y; + TaQ = T1A - T1B; + } + { + E T4p, T4q, T4s, T4t; + T4p = iio[-WS(ios, 31)]; + T4q = rio[WS(ios, 63)]; + T4r = T4p - T4q; + T4s = iio[-WS(ios, 15)]; + T4t = rio[WS(ios, 47)]; + T4u = T4s - T4t; + T7Q = T4r + T4u; + Tbh = T4s + T4t; + TaR = T4q + T4p; + } + { + E TaJ, TaK, TaM, TaN; + { + E T1E, T1F, T3T, T3U; + T1E = rio[WS(ios, 7)]; + T1F = iio[-WS(ios, 39)]; + T1G = T1E + T1F; + TaJ = T1E - T1F; + T3T = iio[-WS(ios, 7)]; + T3U = rio[WS(ios, 39)]; + T3V = T3T - T3U; + TaK = T3T + T3U; + } + { + E T1H, T1I, T3W, T3X; + T1H = iio[-WS(ios, 55)]; + T1I = rio[WS(ios, 23)]; + T1J = T1H + T1I; + TaM = T1H - T1I; + T3W = iio[-WS(ios, 23)]; + T3X = rio[WS(ios, 55)]; + T3Y = T3W - T3X; + TaN = T3X + T3W; + } + T1K = T1G + T1J; + T7R = T3V + T3Y; + Tbe = TaK - TaJ; + Tbd = TaM - TaN; + TaO = TaM + TaN; + TaL = TaJ + TaK; + } + { + E T1O, TaX, T44, TaV, T1R, TaU, T47, TaY; + { + E T1M, T1N, T42, T43; + T1M = rio[WS(ios, 3)]; + T1N = iio[-WS(ios, 35)]; + T1O = T1M + T1N; + TaX = T1M - T1N; + T42 = iio[-WS(ios, 3)]; + T43 = rio[WS(ios, 35)]; + T44 = T42 - T43; + TaV = T42 + T43; + } + { + E T1P, T1Q, T45, T46; + T1P = rio[WS(ios, 19)]; + T1Q = iio[-WS(ios, 51)]; + T1R = T1P + T1Q; + TaU = T1P - T1Q; + T45 = iio[-WS(ios, 19)]; + T46 = rio[WS(ios, 51)]; + T47 = T45 - T46; + TaY = T45 + T46; + } + T1S = T1O + T1R; + TfV = TaV - TaU; + TfW = TaX + TaY; + T41 = T1O - T1R; + T48 = T44 - T47; + TaW = TaU + TaV; + TaZ = TaX - TaY; + T7L = T44 + T47; + } + { + E T1V, Tb4, T4d, Tb2, T1Y, Tb1, T4g, Tb5; + { + E T1T, T1U, T4b, T4c; + T1T = iio[-WS(ios, 59)]; + T1U = rio[WS(ios, 27)]; + T1V = T1T + T1U; + Tb4 = T1T - T1U; + T4b = iio[-WS(ios, 27)]; + T4c = rio[WS(ios, 59)]; + T4d = T4b - T4c; + Tb2 = T4c + T4b; + } + { + E T1W, T1X, T4e, T4f; + T1W = rio[WS(ios, 11)]; + T1X = iio[-WS(ios, 43)]; + T1Y = T1W + T1X; + Tb1 = T1W - T1X; + T4e = iio[-WS(ios, 11)]; + T4f = rio[WS(ios, 43)]; + T4g = T4e - T4f; + Tb5 = T4e + T4f; + } + T1Z = T1V + T1Y; + TfY = Tb4 + Tb5; + TfZ = Tb1 + Tb2; + T4a = T1V - T1Y; + T4h = T4d - T4g; + Tb3 = Tb1 - Tb2; + Tb6 = Tb4 - Tb5; + T7M = T4g + T4d; + } + T1L = T1D + T1K; + T20 = T1S + T1Z; + T9c = T1L - T20; + T9d = T7R + T7Q; + T9e = T7L + T7M; + T9f = T9d - T9e; + { + E T3S, T3Z, TfX, Tg0; + T3S = T1z - T1C; + T3Z = T3V - T3Y; + T40 = T3S + T3Z; + T66 = T3S - T3Z; + TfX = FNMS(KP382683432, TfW, KP923879532 * TfV); + Tg0 = FNMS(KP923879532, TfZ, KP382683432 * TfY); + Tg1 = TfX + Tg0; + Thu = Tg0 - TfX; + } + { + E Tg6, Tg7, Tg3, Tg4; + Tg6 = KP707106781 * (TaL + TaO); + Tg7 = Tbg + Tbh; + Tg8 = Tg6 + Tg7; + Thv = Tg7 - Tg6; + Tg3 = FMA(KP382683432, TfV, KP923879532 * TfW); + Tg4 = FMA(KP923879532, TfY, KP382683432 * TfZ); + Tg5 = Tg3 + Tg4; + Thr = Tg3 - Tg4; + } + { + E T4l, T4m, T49, T4i; + T4l = T48 - T41; + T4m = T4a + T4h; + T4n = KP707106781 * (T4l + T4m); + T67 = KP707106781 * (T4m - T4l); + T49 = T41 + T48; + T4i = T4a - T4h; + T4j = KP707106781 * (T49 + T4i); + T69 = KP707106781 * (T49 - T4i); + } + { + E T4o, T4v, TaP, TaS; + T4o = T1J - T1G; + T4v = T4r - T4u; + T4w = T4o + T4v; + T6a = T4v - T4o; + TaP = KP707106781 * (TaL - TaO); + TaS = TaQ - TaR; + TaT = TaP + TaS; + TdW = TaS - TaP; + } + { + E Tb0, Tb7, TfS, TfT; + Tb0 = FMA(KP923879532, TaW, KP382683432 * TaZ); + Tb7 = FNMS(KP382683432, Tb6, KP923879532 * Tb3); + Tb8 = Tb0 + Tb7; + TdZ = Tb0 - Tb7; + TfS = KP707106781 * (Tbe + Tbd); + TfT = TaQ + TaR; + TfU = TfS - TfT; + Ths = TfS + TfT; + } + { + E T7K, T7N, T7P, T7S; + T7K = T1D - T1K; + T7N = T7L - T7M; + T7O = T7K + T7N; + T8y = T7K - T7N; + T7P = T1Z - T1S; + T7S = T7Q - T7R; + T7T = T7P + T7S; + T8z = T7S - T7P; + } + { + E Tba, Tbb, Tbf, Tbi; + Tba = FNMS(KP382683432, TaW, KP923879532 * TaZ); + Tbb = FMA(KP923879532, Tb6, KP382683432 * Tb3); + Tbc = Tba + Tbb; + TdX = Tbb - Tba; + Tbf = KP707106781 * (Tbd - Tbe); + Tbi = Tbg - Tbh; + Tbj = Tbf + Tbi; + Te0 = Tbi - Tbf; + } + } + { + E T14, T17, T18, Tax, Tas, T3K, T3N, T7F, Tay, Tat, T1b, T3e, T1e, T3h, T1f; + E T7G, TaB, TaA, Taq, Tan, T1n, TfC, TfD, T3k, T3r, Ta8, Tab, T7A, T1u, TfF; + E TfG, T3t, T3A, Taf, Tai, T7B; + { + E T12, T13, T15, T16; + T12 = rio[WS(ios, 1)]; + T13 = iio[-WS(ios, 33)]; + T14 = T12 + T13; + T15 = rio[WS(ios, 17)]; + T16 = iio[-WS(ios, 49)]; + T17 = T15 + T16; + T18 = T14 + T17; + Tax = T15 - T16; + Tas = T12 - T13; + } + { + E T3I, T3J, T3L, T3M; + T3I = iio[-WS(ios, 1)]; + T3J = rio[WS(ios, 33)]; + T3K = T3I - T3J; + T3L = iio[-WS(ios, 17)]; + T3M = rio[WS(ios, 49)]; + T3N = T3L - T3M; + T7F = T3K + T3N; + Tay = T3I + T3J; + Tat = T3L + T3M; + } + { + E Tap, Tao, Tal, Tam; + { + E T19, T1a, T3c, T3d; + T19 = rio[WS(ios, 9)]; + T1a = iio[-WS(ios, 41)]; + T1b = T19 + T1a; + Tap = T19 - T1a; + T3c = iio[-WS(ios, 9)]; + T3d = rio[WS(ios, 41)]; + T3e = T3c - T3d; + Tao = T3c + T3d; + } + { + E T1c, T1d, T3f, T3g; + T1c = iio[-WS(ios, 57)]; + T1d = rio[WS(ios, 25)]; + T1e = T1c + T1d; + Tal = T1c - T1d; + T3f = iio[-WS(ios, 25)]; + T3g = rio[WS(ios, 57)]; + T3h = T3f - T3g; + Tam = T3g + T3f; + } + T1f = T1b + T1e; + T7G = T3e + T3h; + TaB = Tal + Tam; + TaA = Tap + Tao; + Taq = Tao - Tap; + Tan = Tal - Tam; + } + { + E T1j, Ta6, T3n, Taa, T1m, Ta9, T3q, Ta7; + { + E T1h, T1i, T3l, T3m; + T1h = rio[WS(ios, 5)]; + T1i = iio[-WS(ios, 37)]; + T1j = T1h + T1i; + Ta6 = T1h - T1i; + T3l = iio[-WS(ios, 5)]; + T3m = rio[WS(ios, 37)]; + T3n = T3l - T3m; + Taa = T3l + T3m; + } + { + E T1k, T1l, T3o, T3p; + T1k = rio[WS(ios, 21)]; + T1l = iio[-WS(ios, 53)]; + T1m = T1k + T1l; + Ta9 = T1k - T1l; + T3o = iio[-WS(ios, 21)]; + T3p = rio[WS(ios, 53)]; + T3q = T3o - T3p; + Ta7 = T3o + T3p; + } + T1n = T1j + T1m; + TfC = Taa - Ta9; + TfD = Ta6 + Ta7; + T3k = T1j - T1m; + T3r = T3n - T3q; + Ta8 = Ta6 - Ta7; + Tab = Ta9 + Taa; + T7A = T3n + T3q; + } + { + E T1q, Tad, T3w, Tah, T1t, Tag, T3z, Tae; + { + E T1o, T1p, T3u, T3v; + T1o = iio[-WS(ios, 61)]; + T1p = rio[WS(ios, 29)]; + T1q = T1o + T1p; + Tad = T1o - T1p; + T3u = iio[-WS(ios, 29)]; + T3v = rio[WS(ios, 61)]; + T3w = T3u - T3v; + Tah = T3v + T3u; + } + { + E T1r, T1s, T3x, T3y; + T1r = rio[WS(ios, 13)]; + T1s = iio[-WS(ios, 45)]; + T1t = T1r + T1s; + Tag = T1r - T1s; + T3x = iio[-WS(ios, 13)]; + T3y = rio[WS(ios, 45)]; + T3z = T3x - T3y; + Tae = T3x + T3y; + } + T1u = T1q + T1t; + TfF = Tad + Tae; + TfG = Tag + Tah; + T3t = T1q - T1t; + T3A = T3w - T3z; + Taf = Tad - Tae; + Tai = Tag - Tah; + T7B = T3z + T3w; + } + T1g = T18 + T1f; + T1v = T1n + T1u; + T97 = T1g - T1v; + T98 = T7G + T7F; + T99 = T7A + T7B; + T9a = T98 - T99; + { + E T3b, T3i, TfE, TfH; + T3b = T14 - T17; + T3i = T3e - T3h; + T3j = T3b + T3i; + T5Z = T3b - T3i; + TfE = FNMS(KP382683432, TfD, KP923879532 * TfC); + TfH = FNMS(KP923879532, TfG, KP382683432 * TfF); + TfI = TfE + TfH; + Thk = TfH - TfE; + } + { + E TfN, TfO, TfK, TfL; + TfN = KP707106781 * (TaA + TaB); + TfO = Tas + Tat; + TfP = TfN + TfO; + Thl = TfO - TfN; + TfK = FMA(KP382683432, TfC, KP923879532 * TfD); + TfL = FMA(KP923879532, TfF, KP382683432 * TfG); + TfM = TfK + TfL; + Tho = TfK - TfL; + } + { + E T3E, T3F, T3s, T3B; + T3E = T3r - T3k; + T3F = T3t + T3A; + T3G = KP707106781 * (T3E + T3F); + T60 = KP707106781 * (T3F - T3E); + T3s = T3k + T3r; + T3B = T3t - T3A; + T3C = KP707106781 * (T3s + T3B); + T62 = KP707106781 * (T3s - T3B); + } + { + E T3H, T3O, Tac, Taj; + T3H = T1e - T1b; + T3O = T3K - T3N; + T3P = T3H + T3O; + T63 = T3O - T3H; + Tac = FNMS(KP382683432, Tab, KP923879532 * Ta8); + Taj = FMA(KP923879532, Taf, KP382683432 * Tai); + Tak = Tac + Taj; + TdQ = Taj - Tac; + } + { + E Tar, Tau, Tfz, TfA; + Tar = KP707106781 * (Tan - Taq); + Tau = Tas - Tat; + Tav = Tar + Tau; + TdT = Tau - Tar; + Tfz = Tay - Tax; + TfA = KP707106781 * (Taq + Tan); + TfB = Tfz + TfA; + Thn = Tfz - TfA; + } + { + E T7z, T7C, T7E, T7H; + T7z = T18 - T1f; + T7C = T7A - T7B; + T7D = T7z + T7C; + T8v = T7z - T7C; + T7E = T1u - T1n; + T7H = T7F - T7G; + T7I = T7E + T7H; + T8w = T7H - T7E; + } + { + E Taz, TaC, TaE, TaF; + Taz = Tax + Tay; + TaC = KP707106781 * (TaA - TaB); + TaD = Taz + TaC; + TdP = Taz - TaC; + TaE = FMA(KP923879532, Tab, KP382683432 * Ta8); + TaF = FNMS(KP382683432, Taf, KP923879532 * Tai); + TaG = TaE + TaF; + TdS = TaE - TaF; + } + } + { + E T11, T9K, T9T, Ta2, T22, T9Q, T9N, Ta3; + { + E Tv, T10, T9R, T9S; + Tv = Tf + Tu; + T10 = TK + TZ; + T11 = Tv + T10; + T9K = Tv - T10; + T9R = T9p + T9o; + T9S = T93 + T94; + T9T = T9R - T9S; + Ta2 = T9S + T9R; + } + { + E T1w, T21, T9L, T9M; + T1w = T1g + T1v; + T21 = T1L + T20; + T22 = T1w + T21; + T9Q = T21 - T1w; + T9L = T99 + T98; + T9M = T9e + T9d; + T9N = T9L - T9M; + Ta3 = T9L + T9M; + } + rio[0] = T11 + T22; + iio[-WS(ios, 63)] = Ta3 + Ta2; + { + E T9O, T9U, T9J, T9P; + T9O = T9K + T9N; + T9U = T9Q + T9T; + T9J = W[94]; + T9P = W[95]; + rio[WS(ios, 48)] = FNMS(T9P, T9U, T9J * T9O); + iio[-WS(ios, 15)] = FMA(T9P, T9O, T9J * T9U); + } + { + E T9W, T9Y, T9V, T9X; + T9W = T9K - T9N; + T9Y = T9T - T9Q; + T9V = W[30]; + T9X = W[31]; + rio[WS(ios, 16)] = FNMS(T9X, T9Y, T9V * T9W); + iio[-WS(ios, 47)] = FMA(T9X, T9W, T9V * T9Y); + } + { + E Ta0, Ta4, T9Z, Ta1; + Ta0 = T11 - T22; + Ta4 = Ta2 - Ta3; + T9Z = W[62]; + Ta1 = W[63]; + rio[WS(ios, 32)] = FNMS(Ta1, Ta4, T9Z * Ta0); + iio[-WS(ios, 31)] = FMA(Ta1, Ta0, T9Z * Ta4); + } + } + { + E T96, T9y, T9r, T9D, T9h, T9C, T9m, T9z; + { + E T92, T95, T9n, T9q; + T92 = Tf - Tu; + T95 = T93 - T94; + T96 = T92 + T95; + T9y = T92 - T95; + T9n = TZ - TK; + T9q = T9o - T9p; + T9r = T9n + T9q; + T9D = T9q - T9n; + } + { + E T9b, T9g, T9k, T9l; + T9b = T97 + T9a; + T9g = T9c - T9f; + T9h = KP707106781 * (T9b + T9g); + T9C = KP707106781 * (T9b - T9g); + T9k = T9a - T97; + T9l = T9c + T9f; + T9m = KP707106781 * (T9k + T9l); + T9z = KP707106781 * (T9l - T9k); + } + { + E T9i, T9s, T91, T9j; + T9i = T96 + T9h; + T9s = T9m + T9r; + T91 = W[110]; + T9j = W[111]; + rio[WS(ios, 56)] = FNMS(T9j, T9s, T91 * T9i); + iio[-WS(ios, 7)] = FMA(T9j, T9i, T91 * T9s); + } + { + E T9G, T9I, T9F, T9H; + T9G = T9y - T9z; + T9I = T9D - T9C; + T9F = W[78]; + T9H = W[79]; + rio[WS(ios, 40)] = FNMS(T9H, T9I, T9F * T9G); + iio[-WS(ios, 23)] = FMA(T9H, T9G, T9F * T9I); + } + { + E T9u, T9w, T9t, T9v; + T9u = T96 - T9h; + T9w = T9r - T9m; + T9t = W[46]; + T9v = W[47]; + rio[WS(ios, 24)] = FNMS(T9v, T9w, T9t * T9u); + iio[-WS(ios, 39)] = FMA(T9v, T9u, T9t * T9w); + } + { + E T9A, T9E, T9x, T9B; + T9A = T9y + T9z; + T9E = T9C + T9D; + T9x = W[14]; + T9B = W[15]; + rio[WS(ios, 8)] = FNMS(T9B, T9E, T9x * T9A); + iio[-WS(ios, 55)] = FMA(T9B, T9A, T9x * T9E); + } + } + { + E T8u, T8Q, T8J, T8V, T8B, T8U, T8G, T8R; + { + E T8s, T8t, T8H, T8I; + T8s = T7i - T7l; + T8t = KP707106781 * (T82 - T81); + T8u = T8s + T8t; + T8Q = T8s - T8t; + T8H = KP707106781 * (T7r - T7w); + T8I = T87 - T84; + T8J = T8H + T8I; + T8V = T8I - T8H; + } + { + E T8x, T8A, T8E, T8F; + T8x = FNMS(KP382683432, T8w, KP923879532 * T8v); + T8A = FMA(KP923879532, T8y, KP382683432 * T8z); + T8B = T8x + T8A; + T8U = T8A - T8x; + T8E = FMA(KP382683432, T8v, KP923879532 * T8w); + T8F = FNMS(KP382683432, T8y, KP923879532 * T8z); + T8G = T8E + T8F; + T8R = T8E - T8F; + } + { + E T8C, T8K, T8r, T8D; + T8C = T8u + T8B; + T8K = T8G + T8J; + T8r = W[6]; + T8D = W[7]; + rio[WS(ios, 4)] = FNMS(T8D, T8K, T8r * T8C); + iio[-WS(ios, 59)] = FMA(T8D, T8C, T8r * T8K); + } + { + E T8Y, T90, T8X, T8Z; + T8Y = T8Q - T8R; + T90 = T8V - T8U; + T8X = W[38]; + T8Z = W[39]; + rio[WS(ios, 20)] = FNMS(T8Z, T90, T8X * T8Y); + iio[-WS(ios, 43)] = FMA(T8Z, T8Y, T8X * T90); + } + { + E T8M, T8O, T8L, T8N; + T8M = T8u - T8B; + T8O = T8J - T8G; + T8L = W[70]; + T8N = W[71]; + rio[WS(ios, 36)] = FNMS(T8N, T8O, T8L * T8M); + iio[-WS(ios, 27)] = FMA(T8N, T8M, T8L * T8O); + } + { + E T8S, T8W, T8P, T8T; + T8S = T8Q + T8R; + T8W = T8U + T8V; + T8P = W[102]; + T8T = W[103]; + rio[WS(ios, 52)] = FNMS(T8T, T8W, T8P * T8S); + iio[-WS(ios, 11)] = FMA(T8T, T8S, T8P * T8W); + } + } + { + E T7y, T8g, T89, T8l, T7V, T8k, T80, T8h; + { + E T7m, T7x, T83, T88; + T7m = T7i + T7l; + T7x = KP707106781 * (T7r + T7w); + T7y = T7m + T7x; + T8g = T7m - T7x; + T83 = KP707106781 * (T81 + T82); + T88 = T84 + T87; + T89 = T83 + T88; + T8l = T88 - T83; + } + { + E T7J, T7U, T7Y, T7Z; + T7J = FMA(KP923879532, T7D, KP382683432 * T7I); + T7U = FNMS(KP382683432, T7T, KP923879532 * T7O); + T7V = T7J + T7U; + T8k = T7J - T7U; + T7Y = FNMS(KP382683432, T7D, KP923879532 * T7I); + T7Z = FMA(KP382683432, T7O, KP923879532 * T7T); + T80 = T7Y + T7Z; + T8h = T7Z - T7Y; + } + { + E T7W, T8a, T7h, T7X; + T7W = T7y + T7V; + T8a = T80 + T89; + T7h = W[118]; + T7X = W[119]; + rio[WS(ios, 60)] = FNMS(T7X, T8a, T7h * T7W); + iio[-WS(ios, 3)] = FMA(T7X, T7W, T7h * T8a); + } + { + E T8o, T8q, T8n, T8p; + T8o = T8g - T8h; + T8q = T8l - T8k; + T8n = W[86]; + T8p = W[87]; + rio[WS(ios, 44)] = FNMS(T8p, T8q, T8n * T8o); + iio[-WS(ios, 19)] = FMA(T8p, T8o, T8n * T8q); + } + { + E T8c, T8e, T8b, T8d; + T8c = T7y - T7V; + T8e = T89 - T80; + T8b = W[54]; + T8d = W[55]; + rio[WS(ios, 28)] = FNMS(T8d, T8e, T8b * T8c); + iio[-WS(ios, 35)] = FMA(T8d, T8c, T8b * T8e); + } + { + E T8i, T8m, T8f, T8j; + T8i = T8g + T8h; + T8m = T8k + T8l; + T8f = W[22]; + T8j = W[23]; + rio[WS(ios, 12)] = FNMS(T8j, T8m, T8f * T8i); + iio[-WS(ios, 51)] = FMA(T8j, T8i, T8f * T8m); + } + } + { + E T6K, T76, T6Z, T7b, T6R, T7a, T6W, T77; + { + E T6I, T6J, T6X, T6Y; + T6I = T5O - T5P; + T6J = T6j - T6k; + T6K = T6I + T6J; + T76 = T6I - T6J; + T6X = T5W - T5T; + T6Y = T6n - T6m; + T6Z = T6X + T6Y; + T7b = T6Y - T6X; + { + E T6N, T6U, T6Q, T6V; + { + E T6L, T6M, T6O, T6P; + T6L = T5Z - T60; + T6M = T63 - T62; + T6N = FMA(KP831469612, T6L, KP555570233 * T6M); + T6U = FNMS(KP555570233, T6L, KP831469612 * T6M); + T6O = T66 - T67; + T6P = T6a - T69; + T6Q = FNMS(KP555570233, T6P, KP831469612 * T6O); + T6V = FMA(KP555570233, T6O, KP831469612 * T6P); + } + T6R = T6N + T6Q; + T7a = T6N - T6Q; + T6W = T6U + T6V; + T77 = T6V - T6U; + } + } + { + E T6S, T70, T6H, T6T; + T6S = T6K + T6R; + T70 = T6W + T6Z; + T6H = W[114]; + T6T = W[115]; + rio[WS(ios, 58)] = FNMS(T6T, T70, T6H * T6S); + iio[-WS(ios, 5)] = FMA(T6T, T6S, T6H * T70); + } + { + E T7e, T7g, T7d, T7f; + T7e = T76 - T77; + T7g = T7b - T7a; + T7d = W[82]; + T7f = W[83]; + rio[WS(ios, 42)] = FNMS(T7f, T7g, T7d * T7e); + iio[-WS(ios, 21)] = FMA(T7f, T7e, T7d * T7g); + } + { + E T72, T74, T71, T73; + T72 = T6K - T6R; + T74 = T6Z - T6W; + T71 = W[50]; + T73 = W[51]; + rio[WS(ios, 26)] = FNMS(T73, T74, T71 * T72); + iio[-WS(ios, 37)] = FMA(T73, T72, T71 * T74); + } + { + E T78, T7c, T75, T79; + T78 = T76 + T77; + T7c = T7a + T7b; + T75 = W[18]; + T79 = W[19]; + rio[WS(ios, 10)] = FNMS(T79, T7c, T75 * T78); + iio[-WS(ios, 53)] = FMA(T79, T78, T75 * T7c); + } + } + { + E T3a, T52, T4V, T57, T4z, T56, T4E, T53; + { + E T2w, T39, T4H, T4U; + T2w = T2c + T2v; + T39 = T2P + T38; + T3a = T2w + T39; + T52 = T2w - T39; + T4H = T4F + T4G; + T4U = T4K + T4T; + T4V = T4H + T4U; + T57 = T4U - T4H; + { + E T3R, T4C, T4y, T4D; + { + E T3D, T3Q, T4k, T4x; + T3D = T3j + T3C; + T3Q = T3G + T3P; + T3R = FMA(KP980785280, T3D, KP195090322 * T3Q); + T4C = FNMS(KP195090322, T3D, KP980785280 * T3Q); + T4k = T40 + T4j; + T4x = T4n + T4w; + T4y = FNMS(KP195090322, T4x, KP980785280 * T4k); + T4D = FMA(KP195090322, T4k, KP980785280 * T4x); + } + T4z = T3R + T4y; + T56 = T3R - T4y; + T4E = T4C + T4D; + T53 = T4D - T4C; + } + } + { + E T4A, T4W, T23, T4B; + T4A = T3a + T4z; + T4W = T4E + T4V; + T23 = W[122]; + T4B = W[123]; + rio[WS(ios, 62)] = FNMS(T4B, T4W, T23 * T4A); + iio[-WS(ios, 1)] = FMA(T4B, T4A, T23 * T4W); + } + { + E T5a, T5c, T59, T5b; + T5a = T52 - T53; + T5c = T57 - T56; + T59 = W[90]; + T5b = W[91]; + rio[WS(ios, 46)] = FNMS(T5b, T5c, T59 * T5a); + iio[-WS(ios, 17)] = FMA(T5b, T5a, T59 * T5c); + } + { + E T4Y, T50, T4X, T4Z; + T4Y = T3a - T4z; + T50 = T4V - T4E; + T4X = W[58]; + T4Z = W[59]; + rio[WS(ios, 30)] = FNMS(T4Z, T50, T4X * T4Y); + iio[-WS(ios, 33)] = FMA(T4Z, T4Y, T4X * T50); + } + { + E T54, T58, T51, T55; + T54 = T52 + T53; + T58 = T56 + T57; + T51 = W[26]; + T55 = W[27]; + rio[WS(ios, 14)] = FNMS(T55, T58, T51 * T54); + iio[-WS(ios, 49)] = FMA(T55, T54, T51 * T58); + } + } + { + E T5g, T5C, T5v, T5H, T5n, T5G, T5s, T5D; + { + E T5e, T5f, T5t, T5u; + T5e = T2c - T2v; + T5f = T4G - T4F; + T5g = T5e + T5f; + T5C = T5e - T5f; + T5t = T2P - T38; + T5u = T4T - T4K; + T5v = T5t + T5u; + T5H = T5u - T5t; + { + E T5j, T5q, T5m, T5r; + { + E T5h, T5i, T5k, T5l; + T5h = T3j - T3C; + T5i = T3P - T3G; + T5j = FNMS(KP555570233, T5i, KP831469612 * T5h); + T5q = FMA(KP555570233, T5h, KP831469612 * T5i); + T5k = T40 - T4j; + T5l = T4w - T4n; + T5m = FMA(KP831469612, T5k, KP555570233 * T5l); + T5r = FNMS(KP555570233, T5k, KP831469612 * T5l); + } + T5n = T5j + T5m; + T5G = T5m - T5j; + T5s = T5q + T5r; + T5D = T5q - T5r; + } + } + { + E T5o, T5w, T5d, T5p; + T5o = T5g + T5n; + T5w = T5s + T5v; + T5d = W[10]; + T5p = W[11]; + rio[WS(ios, 6)] = FNMS(T5p, T5w, T5d * T5o); + iio[-WS(ios, 57)] = FMA(T5p, T5o, T5d * T5w); + } + { + E T5K, T5M, T5J, T5L; + T5K = T5C - T5D; + T5M = T5H - T5G; + T5J = W[42]; + T5L = W[43]; + rio[WS(ios, 22)] = FNMS(T5L, T5M, T5J * T5K); + iio[-WS(ios, 41)] = FMA(T5L, T5K, T5J * T5M); + } + { + E T5y, T5A, T5x, T5z; + T5y = T5g - T5n; + T5A = T5v - T5s; + T5x = W[74]; + T5z = W[75]; + rio[WS(ios, 38)] = FNMS(T5z, T5A, T5x * T5y); + iio[-WS(ios, 25)] = FMA(T5z, T5y, T5x * T5A); + } + { + E T5E, T5I, T5B, T5F; + T5E = T5C + T5D; + T5I = T5G + T5H; + T5B = W[106]; + T5F = W[107]; + rio[WS(ios, 54)] = FNMS(T5F, T5I, T5B * T5E); + iio[-WS(ios, 9)] = FMA(T5F, T5E, T5B * T5I); + } + } + { + E T5Y, T6w, T6p, T6B, T6d, T6A, T6i, T6x; + { + E T5Q, T5X, T6l, T6o; + T5Q = T5O + T5P; + T5X = T5T + T5W; + T5Y = T5Q + T5X; + T6w = T5Q - T5X; + T6l = T6j + T6k; + T6o = T6m + T6n; + T6p = T6l + T6o; + T6B = T6o - T6l; + { + E T65, T6g, T6c, T6h; + { + E T61, T64, T68, T6b; + T61 = T5Z + T60; + T64 = T62 + T63; + T65 = FNMS(KP195090322, T64, KP980785280 * T61); + T6g = FMA(KP195090322, T61, KP980785280 * T64); + T68 = T66 + T67; + T6b = T69 + T6a; + T6c = FMA(KP980785280, T68, KP195090322 * T6b); + T6h = FNMS(KP195090322, T68, KP980785280 * T6b); + } + T6d = T65 + T6c; + T6A = T6c - T65; + T6i = T6g + T6h; + T6x = T6g - T6h; + } + } + { + E T6e, T6q, T5N, T6f; + T6e = T5Y + T6d; + T6q = T6i + T6p; + T5N = W[2]; + T6f = W[3]; + rio[WS(ios, 2)] = FNMS(T6f, T6q, T5N * T6e); + iio[-WS(ios, 61)] = FMA(T6f, T6e, T5N * T6q); + } + { + E T6E, T6G, T6D, T6F; + T6E = T6w - T6x; + T6G = T6B - T6A; + T6D = W[34]; + T6F = W[35]; + rio[WS(ios, 18)] = FNMS(T6F, T6G, T6D * T6E); + iio[-WS(ios, 45)] = FMA(T6F, T6E, T6D * T6G); + } + { + E T6s, T6u, T6r, T6t; + T6s = T5Y - T6d; + T6u = T6p - T6i; + T6r = W[66]; + T6t = W[67]; + rio[WS(ios, 34)] = FNMS(T6t, T6u, T6r * T6s); + iio[-WS(ios, 29)] = FMA(T6t, T6s, T6r * T6u); + } + { + E T6y, T6C, T6v, T6z; + T6y = T6w + T6x; + T6C = T6A + T6B; + T6v = W[98]; + T6z = W[99]; + rio[WS(ios, 50)] = FNMS(T6z, T6C, T6v * T6y); + iio[-WS(ios, 13)] = FMA(T6z, T6y, T6v * T6C); + } + } + { + E TdO, Tf1, Teq, TeH, Tef, TeW, Ten, TeM, Te3, Ter, Te8, Tem, TeE, Tf0, TeP; + E TeX; + { + E TdG, TeG, TdN, TeF, TdJ, TdM; + TdG = TdE + TdF; + TeG = Ted - Tec; + TdJ = FNMS(KP555570233, TdI, KP831469612 * TdH); + TdM = FMA(KP831469612, TdK, KP555570233 * TdL); + TdN = TdJ + TdM; + TeF = TdM - TdJ; + TdO = TdG + TdN; + Tf1 = TeG - TeF; + Teq = TdG - TdN; + TeH = TeF + TeG; + } + { + E Tee, TeK, Teb, TeL, Te9, Tea; + Tee = Tec + Ted; + TeK = TdE - TdF; + Te9 = FMA(KP555570233, TdH, KP831469612 * TdI); + Tea = FNMS(KP555570233, TdK, KP831469612 * TdL); + Teb = Te9 + Tea; + TeL = Te9 - Tea; + Tef = Teb + Tee; + TeW = TeK - TeL; + Ten = Tee - Teb; + TeM = TeK + TeL; + } + { + E TdV, Te6, Te2, Te7; + { + E TdR, TdU, TdY, Te1; + TdR = TdP + TdQ; + TdU = TdS + TdT; + TdV = FNMS(KP290284677, TdU, KP956940335 * TdR); + Te6 = FMA(KP290284677, TdR, KP956940335 * TdU); + TdY = TdW + TdX; + Te1 = TdZ + Te0; + Te2 = FMA(KP956940335, TdY, KP290284677 * Te1); + Te7 = FNMS(KP290284677, TdY, KP956940335 * Te1); + } + Te3 = TdV + Te2; + Ter = Te6 - Te7; + Te8 = Te6 + Te7; + Tem = Te2 - TdV; + } + { + E TeA, TeN, TeD, TeO; + { + E Tey, Tez, TeB, TeC; + Tey = TdT - TdS; + Tez = TdP - TdQ; + TeA = FNMS(KP471396736, Tez, KP881921264 * Tey); + TeN = FMA(KP881921264, Tez, KP471396736 * Tey); + TeB = TdW - TdX; + TeC = Te0 - TdZ; + TeD = FMA(KP471396736, TeB, KP881921264 * TeC); + TeO = FNMS(KP471396736, TeC, KP881921264 * TeB); + } + TeE = TeA + TeD; + Tf0 = TeN - TeO; + TeP = TeN + TeO; + TeX = TeD - TeA; + } + { + E Te4, Teg, TdD, Te5; + Te4 = TdO + Te3; + Teg = Te8 + Tef; + TdD = W[120]; + Te5 = W[121]; + iio[-WS(ios, 2)] = FMA(TdD, Te4, Te5 * Teg); + rio[WS(ios, 61)] = FNMS(Te5, Te4, TdD * Teg); + } + { + E TeY, Tf2, TeV, TeZ; + TeY = TeW + TeX; + Tf2 = Tf0 + Tf1; + TeV = W[104]; + TeZ = W[105]; + iio[-WS(ios, 10)] = FMA(TeV, TeY, TeZ * Tf2); + rio[WS(ios, 53)] = FNMS(TeZ, TeY, TeV * Tf2); + } + { + E Tf4, Tf6, Tf3, Tf5; + Tf4 = Tf1 - Tf0; + Tf6 = TeW - TeX; + Tf3 = W[40]; + Tf5 = W[41]; + rio[WS(ios, 21)] = FNMS(Tf5, Tf6, Tf3 * Tf4); + iio[-WS(ios, 42)] = FMA(Tf3, Tf6, Tf5 * Tf4); + } + { + E Tei, Tek, Teh, Tej; + Tei = Tef - Te8; + Tek = TdO - Te3; + Teh = W[56]; + Tej = W[57]; + rio[WS(ios, 29)] = FNMS(Tej, Tek, Teh * Tei); + iio[-WS(ios, 34)] = FMA(Teh, Tek, Tej * Tei); + } + { + E Teo, Tes, Tel, Tep; + Teo = Tem + Ten; + Tes = Teq + Ter; + Tel = W[24]; + Tep = W[25]; + rio[WS(ios, 13)] = FNMS(Tep, Tes, Tel * Teo); + iio[-WS(ios, 50)] = FMA(Tel, Tes, Tep * Teo); + } + { + E TeI, TeQ, Tex, TeJ; + TeI = TeE + TeH; + TeQ = TeM + TeP; + Tex = W[8]; + TeJ = W[9]; + rio[WS(ios, 5)] = FNMS(TeJ, TeQ, Tex * TeI); + iio[-WS(ios, 58)] = FMA(Tex, TeQ, TeJ * TeI); + } + { + E TeS, TeU, TeR, TeT; + TeS = TeM - TeP; + TeU = TeH - TeE; + TeR = W[72]; + TeT = W[73]; + iio[-WS(ios, 26)] = FMA(TeR, TeS, TeT * TeU); + rio[WS(ios, 37)] = FNMS(TeT, TeS, TeR * TeU); + } + { + E Teu, Tew, Tet, Tev; + Teu = Teq - Ter; + Tew = Ten - Tem; + Tet = W[88]; + Tev = W[89]; + iio[-WS(ios, 18)] = FMA(Tet, Teu, Tev * Tew); + rio[WS(ios, 45)] = FNMS(Tev, Teu, Tet * Tew); + } + } + { + E Tcr, Tdw, TcX, Td6, TcI, Tdt, TcS, Tdl, Tbm, TcW, TcL, TcT, Tdd, Tdx, Tdi; + E Tds; + { + E Tcq, Td4, TbZ, Td5, TbF, TbY; + Tcq = Tce + Tcp; + Td4 = TcA - TcD; + TbF = FNMS(KP195090322, TbE, KP980785280 * Tbx); + TbY = FMA(KP195090322, TbQ, KP980785280 * TbX); + TbZ = TbF + TbY; + Td5 = TbY - TbF; + Tcr = TbZ + Tcq; + Tdw = Td4 - Td5; + TcX = Tcq - TbZ; + Td6 = Td4 + Td5; + } + { + E TcE, Tdk, TcH, Tdj, TcF, TcG; + TcE = TcA + TcD; + Tdk = Tcp - Tce; + TcF = FMA(KP980785280, TbE, KP195090322 * Tbx); + TcG = FNMS(KP195090322, TbX, KP980785280 * TbQ); + TcH = TcF + TcG; + Tdj = TcF - TcG; + TcI = TcE + TcH; + Tdt = Tdk - Tdj; + TcS = TcE - TcH; + Tdl = Tdj + Tdk; + } + { + E TaI, TcJ, Tbl, TcK; + { + E Taw, TaH, Tb9, Tbk; + Taw = Tak + Tav; + TaH = TaD + TaG; + TaI = FNMS(KP098017140, TaH, KP995184726 * Taw); + TcJ = FMA(KP995184726, TaH, KP098017140 * Taw); + Tb9 = TaT + Tb8; + Tbk = Tbc + Tbj; + Tbl = FMA(KP098017140, Tb9, KP995184726 * Tbk); + TcK = FNMS(KP098017140, Tbk, KP995184726 * Tb9); + } + Tbm = TaI + Tbl; + TcW = TcJ - TcK; + TcL = TcJ + TcK; + TcT = Tbl - TaI; + } + { + E Td9, Tdg, Tdc, Tdh; + { + E Td7, Td8, Tda, Tdb; + Td7 = TaD - TaG; + Td8 = Tav - Tak; + Td9 = FNMS(KP634393284, Td8, KP773010453 * Td7); + Tdg = FMA(KP634393284, Td7, KP773010453 * Td8); + Tda = TaT - Tb8; + Tdb = Tbj - Tbc; + Tdc = FMA(KP773010453, Tda, KP634393284 * Tdb); + Tdh = FNMS(KP634393284, Tda, KP773010453 * Tdb); + } + Tdd = Td9 + Tdc; + Tdx = Tdg - Tdh; + Tdi = Tdg + Tdh; + Tds = Tdc - Td9; + } + { + E Tcs, TcM, Ta5, Tct; + Tcs = Tbm + Tcr; + TcM = TcI + TcL; + Ta5 = W[0]; + Tct = W[1]; + rio[WS(ios, 1)] = FNMS(Tct, TcM, Ta5 * Tcs); + iio[-WS(ios, 62)] = FMA(Ta5, TcM, Tct * Tcs); + } + { + E Tdu, Tdy, Tdr, Tdv; + Tdu = Tds + Tdt; + Tdy = Tdw + Tdx; + Tdr = W[16]; + Tdv = W[17]; + rio[WS(ios, 9)] = FNMS(Tdv, Tdy, Tdr * Tdu); + iio[-WS(ios, 54)] = FMA(Tdr, Tdy, Tdv * Tdu); + } + { + E TdA, TdC, Tdz, TdB; + TdA = Tdw - Tdx; + TdC = Tdt - Tds; + Tdz = W[80]; + TdB = W[81]; + iio[-WS(ios, 22)] = FMA(Tdz, TdA, TdB * TdC); + rio[WS(ios, 41)] = FNMS(TdB, TdA, Tdz * TdC); + } + { + E TcO, TcQ, TcN, TcP; + TcO = TcI - TcL; + TcQ = Tcr - Tbm; + TcN = W[64]; + TcP = W[65]; + iio[-WS(ios, 30)] = FMA(TcN, TcO, TcP * TcQ); + rio[WS(ios, 33)] = FNMS(TcP, TcO, TcN * TcQ); + } + { + E TcU, TcY, TcR, TcV; + TcU = TcS + TcT; + TcY = TcW + TcX; + TcR = W[96]; + TcV = W[97]; + iio[-WS(ios, 14)] = FMA(TcR, TcU, TcV * TcY); + rio[WS(ios, 49)] = FNMS(TcV, TcU, TcR * TcY); + } + { + E Tde, Tdm, Td3, Tdf; + Tde = Td6 + Tdd; + Tdm = Tdi + Tdl; + Td3 = W[112]; + Tdf = W[113]; + iio[-WS(ios, 6)] = FMA(Td3, Tde, Tdf * Tdm); + rio[WS(ios, 57)] = FNMS(Tdf, Tde, Td3 * Tdm); + } + { + E Tdo, Tdq, Tdn, Tdp; + Tdo = Tdl - Tdi; + Tdq = Td6 - Tdd; + Tdn = W[48]; + Tdp = W[49]; + rio[WS(ios, 25)] = FNMS(Tdp, Tdq, Tdn * Tdo); + iio[-WS(ios, 38)] = FMA(Tdn, Tdq, Tdp * Tdo); + } + { + E Td0, Td2, TcZ, Td1; + Td0 = TcX - TcW; + Td2 = TcS - TcT; + TcZ = W[32]; + Td1 = W[33]; + rio[WS(ios, 17)] = FNMS(Td1, Td2, TcZ * Td0); + iio[-WS(ios, 46)] = FMA(TcZ, Td2, Td1 * Td0); + } + } + { + E Tfy, Thd, TgC, TgT, Tgr, Th8, Tgz, TgY, Tgb, TgD, Tgg, Tgy, TgQ, Thc, Th1; + E Th9; + { + E Tfi, TgS, Tfx, TgR, Tfp, Tfw; + Tfi = Tfa + Tfh; + TgS = Tgp - Tgm; + Tfp = FNMS(KP195090322, Tfo, KP980785280 * Tfl); + Tfw = FMA(KP980785280, Tfs, KP195090322 * Tfv); + Tfx = Tfp + Tfw; + TgR = Tfw - Tfp; + Tfy = Tfi + Tfx; + Thd = TgS - TgR; + TgC = Tfi - Tfx; + TgT = TgR + TgS; + } + { + E Tgq, TgW, Tgj, TgX, Tgh, Tgi; + Tgq = Tgm + Tgp; + TgW = Tfa - Tfh; + Tgh = FMA(KP195090322, Tfl, KP980785280 * Tfo); + Tgi = FNMS(KP195090322, Tfs, KP980785280 * Tfv); + Tgj = Tgh + Tgi; + TgX = Tgh - Tgi; + Tgr = Tgj + Tgq; + Th8 = TgW - TgX; + Tgz = Tgq - Tgj; + TgY = TgW + TgX; + } + { + E TfR, Tge, Tga, Tgf; + { + E TfJ, TfQ, Tg2, Tg9; + TfJ = TfB + TfI; + TfQ = TfM + TfP; + TfR = FNMS(KP098017140, TfQ, KP995184726 * TfJ); + Tge = FMA(KP098017140, TfJ, KP995184726 * TfQ); + Tg2 = TfU + Tg1; + Tg9 = Tg5 + Tg8; + Tga = FMA(KP995184726, Tg2, KP098017140 * Tg9); + Tgf = FNMS(KP098017140, Tg2, KP995184726 * Tg9); + } + Tgb = TfR + Tga; + TgD = Tge - Tgf; + Tgg = Tge + Tgf; + Tgy = Tga - TfR; + } + { + E TgM, TgZ, TgP, Th0; + { + E TgK, TgL, TgN, TgO; + TgK = TfP - TfM; + TgL = TfB - TfI; + TgM = FNMS(KP634393284, TgL, KP773010453 * TgK); + TgZ = FMA(KP773010453, TgL, KP634393284 * TgK); + TgN = TfU - Tg1; + TgO = Tg8 - Tg5; + TgP = FMA(KP634393284, TgN, KP773010453 * TgO); + Th0 = FNMS(KP634393284, TgO, KP773010453 * TgN); + } + TgQ = TgM + TgP; + Thc = TgZ - Th0; + Th1 = TgZ + Th0; + Th9 = TgP - TgM; + } + { + E Tgc, Tgs, Tf7, Tgd; + Tgc = Tfy + Tgb; + Tgs = Tgg + Tgr; + Tf7 = W[124]; + Tgd = W[125]; + iio[0] = FMA(Tf7, Tgc, Tgd * Tgs); + rio[WS(ios, 63)] = FNMS(Tgd, Tgc, Tf7 * Tgs); + } + { + E Tha, The, Th7, Thb; + Tha = Th8 + Th9; + The = Thc + Thd; + Th7 = W[108]; + Thb = W[109]; + iio[-WS(ios, 8)] = FMA(Th7, Tha, Thb * The); + rio[WS(ios, 55)] = FNMS(Thb, Tha, Th7 * The); + } + { + E Thg, Thi, Thf, Thh; + Thg = Thd - Thc; + Thi = Th8 - Th9; + Thf = W[44]; + Thh = W[45]; + rio[WS(ios, 23)] = FNMS(Thh, Thi, Thf * Thg); + iio[-WS(ios, 40)] = FMA(Thf, Thi, Thh * Thg); + } + { + E Tgu, Tgw, Tgt, Tgv; + Tgu = Tgr - Tgg; + Tgw = Tfy - Tgb; + Tgt = W[60]; + Tgv = W[61]; + rio[WS(ios, 31)] = FNMS(Tgv, Tgw, Tgt * Tgu); + iio[-WS(ios, 32)] = FMA(Tgt, Tgw, Tgv * Tgu); + } + { + E TgA, TgE, Tgx, TgB; + TgA = Tgy + Tgz; + TgE = TgC + TgD; + Tgx = W[28]; + TgB = W[29]; + rio[WS(ios, 15)] = FNMS(TgB, TgE, Tgx * TgA); + iio[-WS(ios, 48)] = FMA(Tgx, TgE, TgB * TgA); + } + { + E TgU, Th2, TgJ, TgV; + TgU = TgQ + TgT; + Th2 = TgY + Th1; + TgJ = W[12]; + TgV = W[13]; + rio[WS(ios, 7)] = FNMS(TgV, Th2, TgJ * TgU); + iio[-WS(ios, 56)] = FMA(TgJ, Th2, TgV * TgU); + } + { + E Th4, Th6, Th3, Th5; + Th4 = TgY - Th1; + Th6 = TgT - TgQ; + Th3 = W[76]; + Th5 = W[77]; + iio[-WS(ios, 24)] = FMA(Th3, Th4, Th5 * Th6); + rio[WS(ios, 39)] = FNMS(Th5, Th4, Th3 * Th6); + } + { + E TgG, TgI, TgF, TgH; + TgG = TgC - TgD; + TgI = Tgz - Tgy; + TgF = W[92]; + TgH = W[93]; + iio[-WS(ios, 16)] = FMA(TgF, TgG, TgH * TgI); + rio[WS(ios, 47)] = FNMS(TgH, TgG, TgF * TgI); + } + } + { + E ThJ, TiG, Ti7, Tig, ThS, TiD, Ti2, Tiv, Thy, Ti6, ThV, Ti3, Tin, TiH, Tis; + E TiC; + { + E ThI, Tie, ThF, Tif, ThB, ThE; + ThI = ThG + ThH; + Tie = ThM - ThN; + ThB = FNMS(KP555570233, ThA, KP831469612 * Thz); + ThE = FNMS(KP555570233, ThD, KP831469612 * ThC); + ThF = ThB + ThE; + Tif = ThE - ThB; + ThJ = ThF + ThI; + TiG = Tie - Tif; + Ti7 = ThI - ThF; + Tig = Tie + Tif; + } + { + E ThO, Tiu, ThR, Tit, ThP, ThQ; + ThO = ThM + ThN; + Tiu = ThH - ThG; + ThP = FMA(KP831469612, ThA, KP555570233 * Thz); + ThQ = FMA(KP831469612, ThD, KP555570233 * ThC); + ThR = ThP - ThQ; + Tit = ThP + ThQ; + ThS = ThO + ThR; + TiD = Tiu - Tit; + Ti2 = ThO - ThR; + Tiv = Tit + Tiu; + } + { + E Thq, ThT, Thx, ThU; + { + E Thm, Thp, Tht, Thw; + Thm = Thk + Thl; + Thp = Thn + Tho; + Thq = FNMS(KP290284677, Thp, KP956940335 * Thm); + ThT = FMA(KP956940335, Thp, KP290284677 * Thm); + Tht = Thr - Ths; + Thw = Thu + Thv; + Thx = FMA(KP290284677, Tht, KP956940335 * Thw); + ThU = FNMS(KP290284677, Thw, KP956940335 * Tht); + } + Thy = Thq + Thx; + Ti6 = ThT - ThU; + ThV = ThT + ThU; + Ti3 = Thx - Thq; + } + { + E Tij, Tiq, Tim, Tir; + { + E Tih, Tii, Tik, Til; + Tih = Thn - Tho; + Tii = Thl - Thk; + Tij = FNMS(KP471396736, Tii, KP881921264 * Tih); + Tiq = FMA(KP471396736, Tih, KP881921264 * Tii); + Tik = Thv - Thu; + Til = Ths + Thr; + Tim = FNMS(KP881921264, Til, KP471396736 * Tik); + Tir = FMA(KP471396736, Til, KP881921264 * Tik); + } + Tin = Tij + Tim; + TiH = Tiq - Tir; + Tis = Tiq + Tir; + TiC = Tim - Tij; + } + { + E ThK, ThW, Thj, ThL; + ThK = Thy + ThJ; + ThW = ThS + ThV; + Thj = W[4]; + ThL = W[5]; + rio[WS(ios, 3)] = FNMS(ThL, ThW, Thj * ThK); + iio[-WS(ios, 60)] = FMA(Thj, ThW, ThL * ThK); + } + { + E TiE, TiI, TiB, TiF; + TiE = TiC + TiD; + TiI = TiG + TiH; + TiB = W[20]; + TiF = W[21]; + rio[WS(ios, 11)] = FNMS(TiF, TiI, TiB * TiE); + iio[-WS(ios, 52)] = FMA(TiB, TiI, TiF * TiE); + } + { + E TiK, TiM, TiJ, TiL; + TiK = TiG - TiH; + TiM = TiD - TiC; + TiJ = W[84]; + TiL = W[85]; + iio[-WS(ios, 20)] = FMA(TiJ, TiK, TiL * TiM); + rio[WS(ios, 43)] = FNMS(TiL, TiK, TiJ * TiM); + } + { + E ThY, Ti0, ThX, ThZ; + ThY = ThS - ThV; + Ti0 = ThJ - Thy; + ThX = W[68]; + ThZ = W[69]; + iio[-WS(ios, 28)] = FMA(ThX, ThY, ThZ * Ti0); + rio[WS(ios, 35)] = FNMS(ThZ, ThY, ThX * Ti0); + } + { + E Ti4, Ti8, Ti1, Ti5; + Ti4 = Ti2 + Ti3; + Ti8 = Ti6 + Ti7; + Ti1 = W[100]; + Ti5 = W[101]; + iio[-WS(ios, 12)] = FMA(Ti1, Ti4, Ti5 * Ti8); + rio[WS(ios, 51)] = FNMS(Ti5, Ti4, Ti1 * Ti8); + } + { + E Tio, Tiw, Tid, Tip; + Tio = Tig + Tin; + Tiw = Tis + Tiv; + Tid = W[116]; + Tip = W[117]; + iio[-WS(ios, 4)] = FMA(Tid, Tio, Tip * Tiw); + rio[WS(ios, 59)] = FNMS(Tip, Tio, Tid * Tiw); + } + { + E Tiy, TiA, Tix, Tiz; + Tiy = Tiv - Tis; + TiA = Tig - Tin; + Tix = W[52]; + Tiz = W[53]; + rio[WS(ios, 27)] = FNMS(Tiz, TiA, Tix * Tiy); + iio[-WS(ios, 36)] = FMA(Tix, TiA, Tiz * Tiy); + } + { + E Tia, Tic, Ti9, Tib; + Tia = Ti7 - Ti6; + Tic = Ti2 - Ti3; + Ti9 = W[36]; + Tib = W[37]; + rio[WS(ios, 19)] = FNMS(Tib, Tic, Ti9 * Tia); + iio[-WS(ios, 44)] = FMA(Ti9, Tic, Tib * Tia); + } + } + } + return W; +} + +static const tw_instr twinstr[] = { + {TW_FULL, 0, 64}, + {TW_NEXT, 1, 0} +}; + +static const hc2hc_desc desc = { 64, "hb_64", twinstr, {808, 270, 230, 0}, &GENUS, 0, 0, 0 }; + +void X(codelet_hb_64) (planner *p) { + X(khc2hc_dif_register) (p, hb_64, &desc); +} |